""" // ------------------------------------------------- // // // // **DataAnalisys.R :** // // Compares data from two analysed AFM experiment // // // // ------------------------------------------------- // // **Original algorithm :** // // Childerick SEVERAC, Sergio PROA CORONADO // // Etienne DAGUE, Adrian MARTINEZ RIVAS // // // // **Script developer :** // // Sergio PROA CORONADO // // ------------------------------------------------- // ## In case you use the results of this script with your article, please don't forget to cite us: - Séverac, C., Coronado, Proa-Coronado, S., Formosa-Dague, C., Martinez-Rivas A., Dague, E., "*Automation of Bio-Atomic Force Microscope Measurements on Hundreds of C. albicans Cells*". JOVE 2020 ## Purpose: This R script compares data from two analysed AFM experiment. For procedures please refer to the JOVE article cited above. ## Copyrights (C) 2019-2020 CNRS (France) and IPN (Mexico) ## License: Automatip_scan is a free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. https://www.gnu.org/licenses/gpl-3.0.en.html """ # The images will be stored on a folder name Images, this folder should be where the R script # is. By default this script saves images in SVG format but PNG can be used too. # This script analyze two experiments (untreated and treated per experiment) at the same time. #Libraries imported library(tidyverse) library(stringi) library(reticulate) library(ggsignif) #Loading the sets. Treated and untreated cells sets #Here you can select all columns from the file or just a few. The ones presented here #are enough for the analysis Exp1Unt <- subset(Exp1, select = c("UFNR30", "UCPR30", "USlR30", "UWells")) Exp1Tre <- subset(Exp1, select = c("TFNR30", "TCPR30", "TSlR30", "TWells")) Exp2Unt <- subset(Exp1, select = c("UFNR30", "UCPR30", "USlR30", "UWells")) Exp2Tre <- subset(Exp1, select = c("TFNR30", "TCPR30", "TSlR30", "TWells")) #Eliminating NAs Exp1Unt <- na.omit(Exp1Unt) Exp1Tre <- na.omit(Exp1Tre) Exp2Unt <- na.omit(Exp1Unt) Exp2Tre <- na.omit(Exp1Tre) #Converting to pN/nm (slope) and um (contact point) Exp1Unt$USlR30 <- Exp1Unt$USlR30 * 1e3 Exp1Tre$TSlR30 <- Exp1Tre$TSlR30 * 1e3 Exp1Unt$UCPR30 <- Exp1Unt$UCPR30 * 1e6 Exp1Tre$TCPR30 <- Exp1Tre$TCPR30 * 1e6 Exp2Unt$USlR30 <- Exp2Unt$USlR30 * 1e3 Exp2Tre$TSlR30 <- Exp2Tre$TSlR30 * 1e3 Exp2Unt$UCPR30 <- Exp2Unt$UCPR30 * 1e6 Exp2Tre$TCPR30 <- Exp2Tre$TCPR30 * 1e6 #For convenience we separate the number of the wells from the filename and added to a new column fileName = Exp1Unt[["UFNR30"]] TempPyNames = r_to_py(fileName, convert = FALSE) i = 0 vectorWells = c() while(i < length(TempPyNames)){ finS = TempPyNames[i]$find('-') NumberWells = substr(TempPyNames[i], 0, py_to_r(finS)) vectorWells = append(vectorWells, NumberWells, length(vectorWells)) i = i + 1 } #Adding Wells column to the data frames Exp1Unt$UWells <- vectorWells fileName = Exp1Tre[["TFNR30"]] TempPyNames = r_to_py(fileName, convert = FALSE) i = 0 vectorWells = c() while(i < length(TempPyNames)){ finS = TempPyNames[i]$find('-') NumberWells = substr(TempPyNames[i], 0, py_to_r(finS)) vectorWells = append(vectorWells, NumberWells, length(vectorWells)) i = i + 1 } #Adding Wells column to the data frames Exp1Tre$TWells <- vectorWells fileName = Exp2Unt[["UFNR30"]] TempPyNames = r_to_py(fileName, convert = FALSE) i = 0 vectorWells = c() while(i < length(TempPyNames)){ finS = TempPyNames[i]$find('-') NumberWells = substr(TempPyNames[i], 0, py_to_r(finS)) vectorWells = append(vectorWells, NumberWells, length(vectorWells)) i = i + 1 } #Adding Wells column to the data frames Exp2Unt$UWells <- vectorWells fileName = Exp2Tre[["TFNR30"]] TempPyNames = r_to_py(fileName, convert = FALSE) i = 0 vectorWells = c() while(i < length(TempPyNames)){ finS = TempPyNames[i]$find('-') NumberWells = substr(TempPyNames[i], 0, py_to_r(finS)) vectorWells = append(vectorWells, NumberWells, length(vectorWells)) i = i + 1 } #Adding Wells column to the data frames Exp2Tre$TWells <- vectorWells #Filtering by height (contact point) filterU1.byCP <- na.omit(filter(Exp1Unt, UCPR30 > 4.15)) filterT1.byCP <- na.omit(filter(Exp1Tre, TCPR30 > 4.15)) filterU2.byCP <- na.omit(filter(Exp2Unt, UCPR30 > 4.15)) filterT2.byCP <- na.omit(filter(Exp2Tre, TCPR30 > 4.15)) #Filtering negative values (slope) filterU1.byNV <- na.omit(filter(filterU1.byCP, USlR30 > 0)) filterT1.byNV <- filter(filterT1.byCP, TSlR30 > 0) filterU2.byNV <- na.omit(filter(filterU2.byCP, USlR30 > 0)) filterT2.byNV <- filter(filterT2.byCP, TSlR30 > 0) #Filtering by slope values (discarding PDMS) filterU1.Cells <- na.omit(filter(filterU1.byNV, filterU1.byNV$USlR30 < 150)) filterT1.Cells <- filter(filterT1.byNV, filterT1.byNV$TSlR30 < 150) filterU2.Cells <- na.omit(filter(filterU2.byNV, filterU2.byNV$USlR30 < 150)) filterT2.Cells <- filter(filterT2.byNV, filterT2.byNV$TSlR30 < 150) #Dividing the data into clusters, the number of clusters depends on the number of peaks kmU1 <- filterU1.Cells %>% subset(select = c("UCPR30", "USlR30")) %>% kmeans(centers = 3) filterU1.Cells$Clust <-as.factor(kmU1$cluster) kmT1 <- filterT1.Cells %>% subset(select= c("TCPR30", "TSlR30")) %>% kmeans(centers = 3) filterT1.Cells$Clust <- as.factor(kmT1$cluster) kmU2 <- filterU2.Cells %>% subset(select = c("UCPR30", "USlR30")) %>% kmeans(centers = 2) filterU2.Cells$Clust <-as.factor(kmU2$cluster) kmT2 <- filterT2.Cells %>% subset(select= c("TCPR30", "TSlR30")) %>% kmeans(centers = 2) filterT2.Cells$Clust <- as.factor(kmT2$cluster) #Histogram of the untreated cells %------------------------------------------------------- #Determining the binwidth Freddman-Diaconis rule. bw <- (2 * IQR(filterU1.Cells[["USlR30"]]) / length(filterU1.Cells[["USlR30"]]) ^ (1 / 3)) CellSlopeU1_FR <- ggplot(filterU1.Cells, aes(x = USlR30)) + geom_histogram(aes(y = ((..count..)/sum(..count..) * 100)), binwidth = bw, color = "#FF9999", fill = "#FF6666") + stat_density(aes(y = ((..count..)/sum(..count..)) * 1500), geom = "line", color = "red", size = 1) + theme(text = element_text(size = 22)) + ylab("Relative frequency [%]") + xlab("Spring constant [pN/nm]") ggsave("Images/CellSlope_U1_FR.svg", CellSlopeU1_FR, width = 7.5, height = 5, dpi = 300) bw <- (2 * IQR(filterU2.Cells[["USlR30"]]) / length(filterU2.Cells[["USlR30"]]) ^ (1 / 3)) CellSlopeU2_FR <- ggplot(filterU2.Cells, aes(x = USlR30)) + geom_histogram(aes(y = ((..count..)/sum(..count..) * 100)), binwidth = bw, color = "#FF9999", fill = "#FF6666") + stat_density(aes(y = ((..count..)/sum(..count..)) * 1400), geom = "line", color = "red", size = 1) + theme(text = element_text(size = 22)) + ylab("Relative frequency [%]") + xlab("Spring constant [pN/nm]") ggsave("Images/CellSlope_U2_FR.svg", CellSlopeU2_FR, width = 7.5, height = 5, dpi = 300) #Histogram of the treated cells %-------------------------------------------------------- bw <- (2 * IQR(filterT1.Cells[["TSlR30"]]) / length(filterT1.Cells[["TSlR30"]]) ^ (1 / 3)) CellSlopeT1_FR <- ggplot(filterT1.Cells, aes(x = TSlR30)) + geom_histogram(aes( y = ((..count..)/sum(..count..) * 100)), binwidth = bw, color = "#66CCFF", fill = "#0099FF") + stat_density(aes(y = ((..count..)/sum(..count..)) * 1500), geom = "line", color = "red", size = 1) + theme(text = element_text(size = 22)) + ylab("Relative frequency [%]") + xlab("Spring constant [pN/nm]") ggsave("Images/CellSlope_T1_FR.svg", CellSlopeT1_FR, width = 7.5, height = 5, dpi = 300) bw <- (2 * IQR(filterT2.Cells[["TSlR30"]]) / length(filterT2.Cells[["TSlR30"]]) ^ (1 / 3)) CellSlopeT2_FR <- ggplot(filterT2.Cells, aes(x = TSlR30)) + geom_histogram(aes(y = ((..count..)/sum(..count..) * 100)), binwidth = bw, color = "#66CCFF", fill = "#0099FF") + stat_density(aes(y = ((..count..)/sum(..count..)) * 1500), geom = "line", color = "red", size = 1) + theme(text = element_text(size = 22)) + ylab("Relative frequency [%]") + xlab("Spring constant [pN/nm]") ggsave("Images/CellSlope_T2_FR.svg", CellSlopeT2_FR, width = 7.5, height = 5, dpi = 300) #Obtaining statistics of each peak inside the plots------------------------------------ #Assign a variable to each cluster number, the number of clusters is related to the number # of center used in the kmeans function #Exp 1 StatasU1.1 <- filterU1.Cells %>% filter(filterU1.Cells$Clust == 1) StatasU1.2 <- filterU1.Cells %>% filter(filterU1.Cells$Clust == 2) StatasU1.3 <- filterU1.Cells %>% filter(filterU1.Cells$Clust == 3) StatasT1.1 <- filterT1.Cells %>% filter(filterT1.Cells$Clust == 1) StatasT1.2 <- filterT1.Cells %>% filter(filterT1.Cells$Clust == 2) StatasT1.3 <- filterT1.Cells %>% filter(filterT1.Cells$Clust == 3) #Exp2 StatasU2.2 <- filterU2.Cells %>% filter(filterU2.Cells$Clust == 1) StatasU2.2 <- filterU2.Cells %>% filter(filterU2.Cells$Clust == 2) StatasT2.1 <- filterT2.Cells %>% filter(filterT2.Cells$Clust == 1) StatasT2.2 <- filterT2.Cells %>% filter(filterT2.Cells$Clust == 2) #ANOVA test Slope----------------------------------------------------------------------------------- # Data frames must contain the same number of data in order to transform it in large data format Temp <- head(filterU1.Cells, nrow(filterT1.Cells)) Set1.Sl <- cbind(filterT1.Cells,Temp) Temp <- head(filterU2.Cells, nrow(filterT2.Cells)) Set2.Sl <- cbind(filterT2.Cells,Temp) Oneway1.info <- Set1.Sl %>% subset(select = c("USlR30","TSlR30")) %>% gather(SlopeType, SlopeVal, USlR30, TSlR30) Oneway2.info <- Set2.Sl %>% subset(select = c("USlR30","TSlR30")) %>% gather(SlopeType, SlopeVal, USlR30, TSlR30) aov1.out = aov(SlopeVal ~ SlopeType, data = Oneway1.info) aov2.out = aov(SlopeVal ~ SlopeType, data = Oneway2.info) summary(aov1.out) summary(aov2.out) # Renaming values for SlopeType Oneway1.info$SlopeType <- plyr::revalue(Oneway1.info$SlopeType , c("USlR30" = "Native", "TSlR30" = "Treated")) Oneway2.info$SlopeType <- plyr::revalue(Oneway2.info$SlopeType , c("USlR30" = "Native", "TSlR30" = "Treated")) Anova1 <- ggplot(Oneway1.info, aes(x = SlopeType, y = SlopeVal, fill = SlopeType)) + geom_boxplot(colour = "blue", fill = c("#FF6666", "#0099FF")) + scale_x_discrete() + xlab("C. albicans") + ylab("Spring constant [pN/nm]") + scale_fill_discrete(breaks = c("Native", "Treated")) + geom_signif(comparisons = list(c("Native", "Treated")), map_signif_level = TRUE) + theme(legend.title = element_blank(), text = element_text(size = 20)) ggsave("Images/ANOVA1.svg", Anova1, width = 6, height = 4, dpi = 200) Anova2 <- ggplot(Oneway2.info, aes(x = SlopeType, y = SlopeVal, fill = SlopeType)) + geom_boxplot(colour = "blue", fill = c("#FF6666", "#0099FF")) + scale_x_discrete() + xlab("C. albicans") + ylab("Spring constant [pN/nm]") + scale_fill_discrete(breaks = c("Native", "Treated")) + theme(text = element_text(size = 20), legend.position = "none") + geom_signif(comparisons = list(c("Native", "Treated")), map_signif_level = TRUE) ggsave("Images/ANOVA2.svg", Anova2, width = 6, height = 4, dpi = 200) #Obtaining median per cell------------------------------------------------------------------ #Obtaining the number of cells per treatment (Unt or Treat) UstringNumber <- r_to_py(filterU1.Cells$UWells[nrow(filterU1.Cells)], convert = FALSE) UtotWells1 <- strtoi(substr(UstringNumber, 5, stri_length(UstringNumber))) TstringNumber <- r_to_py(filterT1.Cells$TWells[nrow(filterT1.Cells)], convert = FALSE) TtotWells1 <- strtoi(substr(TstringNumber, 5, stri_length(TstringNumber))) j = 0 filterU1.AMxCell <- data.frame(Well = character(), averagUAdh = numeric(), medianUAdh = numeric(), stringsAsFactors = FALSE) while(j <= UtotWells1){ compstr <- paste("Well", toString(j), sep = "") SelectedWell = na.omit(filter(filterU1.Cells, UWells == compstr)) averageUA = mean(as.numeric(SelectedWell$USlR30)) medianUA = median(as.numeric(SelectedWell$USlR30)) filterU1.AMxCell[nrow(filterU1.AMxCell) + 1,] <- list(compstr, averageUA, medianUA) j = j + 1 } filterU1.AMxCell <- na.omit(filterU1.AMxCell) j = 0 filterT1.AMxCell <- data.frame(Well = character(), averagTAdh = numeric(), medianTAdh = numeric(), stringsAsFactors = FALSE) while(j <= TtotWells1){ compstr <- paste("Well", toString(j), sep = "") SelectedWell = na.omit(filter(filterT1.Cells, TWells == compstr)) averageTA = mean(as.numeric(SelectedWell$TSlR30)) medianTA = median(as.numeric(SelectedWell$TSlR30)) filterT1.AMxCell[nrow(filterT1.AMxCell) + 1,] <- list(compstr, averageTA, medianTA) j = j + 1 } filterT1.AMxCell <- na.omit(filterT1.AMxCell) #Plots median per cell bw <- (2 * IQR(filterU1.AMxCell[["medianUAdh"]]) / length(filterU1.AMxCell[["medianUAdh"]]) ^ (1 / 3)) MedianSlopeU1_FR <- ggplot(filterU1.AMxCell, aes(x = medianUAdh)) + geom_histogram(aes(y = ((..count..)/sum(..count..) * 100)), binwidth = bw, color = "#FF9999", fill = "#FF6666") + stat_density(aes(y = ((..count..)/sum(..count..)) * 2800), geom = "line", color = "red", size = 1) + theme(text = element_text(size = 20)) + ylab("Relative frequency [%]") + xlab("Spring constant [pN/nm]") ggsave("Images/MedianSlope_U1_FR.svg", MedianSlopeU1_FR, width = 7.5, height = 5, dpi = 300) bw <- (2 * IQR(filterT1.AMxCell[["medianTAdh"]]) / length(filterT1.AMxCell[["medianTAdh"]]) ^ (1 / 3)) MedianSlopeT1_FR <- ggplot(filterT1.AMxCell, aes(x = medianTAdh)) + geom_histogram(aes(y = ((..count..)/sum(..count..) * 100)), binwidth = bw, color = "#66CCFF", fill = "#0099FF") + stat_density(aes(y = ((..count..)/sum(..count..)) * 2500), geom = "line", color = "red", size = 1) + theme(text = element_text(size = 20)) + ylab("Relative frequency [%]") + xlab("Spring constant [pN/nm]") ggsave("Images/MedianSlope_T1_FR.svg", MedianSlopeT1_FR, width = 7.5, height = 5, dpi = 300) #Time dependance analysis------------------------------------------------------------------ #Adding a column which will contain the hour of the experiment, the format will NOT be datetime type fileHour = filterU1.Cells[["UFNR30"]] TempPyHours = r_to_py(fileHour, convert = FALSE) i = 0 vectorHours = c() while(i < length(TempPyHours)){ iniS = TempPyHours[i]$find('.') ini = as.numeric(py_to_r(iniS)) + 8 #+8 is because the object changes to r, in r the index starts at 1 and after 8 positions you will be in the first number of the hour finS = ini + 4 WellsHours = substr(TempPyHours[i], ini, finS) vectorHours = append(vectorHours, WellsHours, length(vectorHours)) i = i + 1 } filterU1.Cells$Time <- as.numeric(vectorHours) fileHour = filterU2.Cells[["UFN"]] TempPyHours = r_to_py(fileHour, convert = FALSE) i = 0 vectorHours = c() while(i < length(TempPyHours)){ iniS = TempPyHours[i]$find('.') ini = as.numeric(py_to_r(iniS)) + 8 finS = ini + 4 WellsHours = substr(TempPyHours[i], ini, finS) vectorHours = append(vectorHours, WellsHours, length(vectorHours)) i = i + 1 } filterU2.Cells$Time <- as.numeric(vectorHours) fileHour = filterT1.Cells[["TFNR30"]] TempPyHours = r_to_py(fileHour, convert = FALSE) i = 0 vectorHours = c() while(i < length(TempPyHours)){ iniS = TempPyHours[i]$find('.') ini = as.numeric(py_to_r(iniS)) + 8 #+8 is because the object changes to r, in r the index starts at 1 and after 8 positions you will be in the first number of the hour finS = ini + 4 WellsHours = substr(TempPyHours[i], ini, finS) vectorHours = append(vectorHours, WellsHours, length(vectorHours)) i = i + 1 } filterT1.Cells$Time <- as.numeric(vectorHours) fileHour = filterT2.Cells[["TFN"]] TempPyHours = r_to_py(fileHour, convert = FALSE) i = 0 vectorHours = c() while(i < length(TempPyHours)){ iniS = TempPyHours[i]$find('.') ini = as.numeric(py_to_r(iniS)) + 8 #+8 is because the object changes to r, in r the index starts at 1 and after 8 positions you will be in the first number of the hour finS = ini + 4 WellsHours = substr(TempPyHours[i], ini, finS) vectorHours = append(vectorHours, WellsHours, length(vectorHours)) i = i + 1 } filterT2.Cells$Time <- as.numeric(vectorHours) #Plots showing if there is time dependance------------------------------------------------------------ filterU1.Cells$Time <- filterU1.Cells$Time - 7 filterU2.Cells$Time <- filterU2.Cells$Time - 9 #Adding a variable for grouping the data on each hour filterU1.Cells$Hour <- ifelse(filterU1.Cells$Time <= 1.37, 1, ifelse(filterU1.Cells$Time <= 2.38, 2, ifelse(filterU1.Cells$Time <= 3.39, 3, ifelse(filterU1.Cells$Time <= 4.4, 4, 5)))) filterU2.Cells$Hour <- ifelse(filterU2.Cells$Time <= 1.29, 1, ifelse(filterU2.Cells$Time <= 2.30, 2, ifelse(filterU2.Cells$Time <= 3.31, 3, 4))) #Histograms ploted by hour, the result should show the two populations tPlot <- ggplot(filterU1.Cells, aes(x = filterU1.Cells$USlR30, y = filterU1.Cells$UCPR30, color = Clust)) + geom_point() + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.title.y = element_blank(), axis.line.y = element_blank(), text = element_text(size = 18), legend.position = "none") + scale_color_manual(values = c("#33FF00","#33CCCC", "#33FF00")) + scale_y_continuous() + xlab("Spring constant [pN/nm]") + ylab("Counts") hours.labs <- c("1 hr","2 hrs","3 hrs","4 hrs", "5 hrs") names(hours.labs) <- c("1","2","3","4","5") timePlotU1 <- tPlot + facet_wrap(~ Hour, ncol = 1, labeller = labeller(Hour = hours.labs)) ggsave("Images/timePlotU1.svg", timePlotU1, width = 5, height = 10, dpi = 600) tPlot <- ggplot(filterU2.Cells, aes(x = filterU2.Cells$Usl, y = filterU2.Cells$UCP, color = Clust)) + geom_point() + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.title.y = element_blank(), axis.line.y = element_blank(), text = element_text(size = 18), legend.position = "none") + scale_color_manual(values = c("#33CCCC", "#33FF00")) + scale_y_continuous() + xlab("Spring constant [pN/nm]") + ylab("Counts") hours.labs <- c("1 hr","2 hrs","3 hrs","4 hrs") names(hours.labs) <- c("1","2","3","4") timePlotU2 <- tPlot + facet_wrap(~ Hour, ncol = 1, labeller = labeller(Hour = hours.labs)) ggsave("Images/timePlotU2.svg", timePlotU2, width = 5, height = 10, dpi = 600) #Dot plots per position, we assume the same as above pPlot <- ggplot(centralUVals, aes(x = centralUVals$USlR30, y = centralUVals$UCPR30, color = centralUVals$Clust)) + geom_point() + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.title.y = element_blank(), axis.line.y = element_blank(), text = element_text(size = 18), legend.position = "none") + scale_color_manual(values = c("#33FF00","#33CCCC", "#33FF00")) + scale_y_continuous() + xlab("Spring constant [pN/nm]") + ylab("Counts") posit.labs <- c("Position 1","Position 2","Position 3","Position 4") names(posit.labs) <- c("5","6","9","10") posPlotU1 <- pPlot + facet_wrap(~ Index, ncol = 2, labeller = labeller(Index = posit.labs)) ggsave("Images/posPlotU1.svg", posPlotU1, width = 7.5, height = 5, dpi = 600) pPlotU2 <- ggplot(filterU2.Cells, aes(x = filterU2.Cells$Usl, y = filterU2.Cells$UCP, color = Clust)) + geom_point() + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.title.y = element_blank(), axis.line.y = element_blank(), text = element_text(size = 18), legend.position = "none") + scale_color_manual(values = c("#33CCCC", "#33FF00")) + scale_y_continuous() + xlab("Spring constant [pN/nm]") + ylab("Counts") posit.labs <- c("Position 1","Position 2","Position 3","Position 4", "Position 5", "Position 6", "Position 7", "Position 8", "Position 9") names(posit.labs) <- c("0","1","2","3", "4", "5", "6", "7", "8") posPlotU2 <- pPlotU2 + facet_wrap(~ UPI, ncol = 3, labeller = labeller(UPI = posit.labs)) ggsave("Images/posPlotU2.svg", posPlotU2, width = 7.5, height = 5, dpi = 600) #Histograms separated by cluster CellSlopeU1_FR <- ggplot(filterU1.Cells, aes(x = USlR30)) + #, fill = filterU1.Cells$Clust )) + geom_histogram(aes(y = ((..count..)/sum(..count..) * 100)), binwidth = bw, color = "#FF9999", fill = "#FF6666") + #stat_density(aes(y = ((..count..)/sum(..count..)) * 1500), geom = "line", color = "red", size = 1) + theme(text = element_text(size = 22)) + ylab("Relative frequency [%]") + xlab("Spring constant [pN/nm]") groupPlotU1 <- CellSlopeU1_FR + annotate("rect", xmin = c(0,33), xmax = c(33,155), ymin = c(0,0), ymax = c(20,12), alpha = 0.2, color = c("#33CCCC", "#33FF00"), fill = c("#33CCCC", "#33FF00")) ggsave("Images/U1GroupedClust.svg", groupPlotU1, width = 7.5, height = 5, dpi = 600) groupPlotU2 <- CellSlopeU2_FR + annotate("rect", xmin = c(0,56.5), xmax = c(56.5,155), ymin = c(0,0), ymax = c(10,5), alpha = 0.2, color = c("#33CCCC", "#33FF00"), fill = c("#33CCCC", "#33FF00")) ggsave("Images/U2GroupedClust.svg", groupPlotU2, width = 7.5, height = 5, dpi = 600) #Adhesion analysis------------------------------------------------------------------ #Untreated #Because Adhesion and Slope analysis were made separately, we need to merge them by the filename #slope results CellsU1Cp <- filterU1.Cells CellsU2Cp <- filterU2.Cells #adhesion results colnames(CellsU1Cp)[1] <- "UFN" colnames(AdhUnt1)[1] <- "UFN" fileName = CellsU1Cp[["UFN"]] TempPyNames = r_to_py(fileName, convert = FALSE) i = 0 vectorNames = c() while(i < length(TempPyNames)){ starS = TempPyNames[i]$find('-') endS = stri_length(TempPyNames[i]) - 10 FilesNam = substr(TempPyNames[i], py_to_r(starS) + 2, endS) vectorNames = append(vectorNames, FilesNam, length(vectorNames)) i = i + 1 } CellsU1Cp$FileNames <- vectorNames fileName = AdhUnt1[["UFN"]] TempPyNames = r_to_py(fileName, convert = FALSE) i = 0 vectorNames = c() while(i < length(TempPyNames)){ starS = TempPyNames[i]$find('-') endS = stri_length(TempPyNames[i]) - 4 FilesNam = substr(TempPyNames[i], py_to_r(starS) + 2, endS) vectorNames = append(vectorNames, FilesNam, length(vectorNames)) i = i + 1 } AdhUnt1$FileNames <- vectorNames fileName = CellsU2Cp[["UFN"]] TempPyNames = r_to_py(fileName, convert = FALSE) i = 0 vectorNames = c() while(i < length(TempPyNames)){ starS = TempPyNames[i]$find('-') endS = stri_length(TempPyNames[i]) - 10 FilesNam = substr(TempPyNames[i], py_to_r(starS) + 2, endS) vectorNames = append(vectorNames, FilesNam, length(vectorNames)) i = i + 1 } CellsU2Cp$FileNames <- vectorNames fileName = AdhUnt2[["FileName"]] TempPyNames = r_to_py(fileName, convert = FALSE) i = 0 vectorNames = c() while(i < length(TempPyNames)){ starS = TempPyNames[i]$find('-') endS = stri_length(TempPyNames[i]) - 4 FilesNam = substr(TempPyNames[i], py_to_r(starS) + 2, endS) vectorNames = append(vectorNames, FilesNam, length(vectorNames)) i = i + 1 } AdhUnt2$FileNames <- vectorNames AdhUn1.Cells <- left_join(CellsU1Cp, AdhUnt1, by = "FileNames") AdhUn2.Cells <-left_join(CellsU2Cp, AdhUnt2, by = "FileNames") AdhUn1Filtered.Cells <- na.omit(AdhUn1.Cells) AdhUn2Filtered.Cells <- na.omit(AdhUn2.Cells) #Treated fileName = AdhTre1[["FileName"]] TempPyNames = r_to_py(fileName, convert = FALSE) i = 0 vectorNames = c() while(i < length(TempPyNames)){ starS = TempPyNames[i]$find('-') endS = stri_length(TempPyNames[i]) - 4 FilesNam = substr(TempPyNames[i], py_to_r(starS) + 2, endS) vectorNames = append(vectorNames, FilesNam, length(vectorNames)) i = i + 1 } AdhTre1$FileNames <- vectorNames fileName = filterT1.Cells[["TFNR30"]] TempPyNames = r_to_py(fileName, convert = FALSE) i = 0 vectorNames = c() while(i < length(TempPyNames)){ starS = TempPyNames[i]$find('-') endS = stri_length(TempPyNames[i]) - 10 FilesNam = substr(TempPyNames[i], py_to_r(starS) + 2, endS) vectorNames = append(vectorNames, FilesNam, length(vectorNames)) i = i + 1 } filterT1.Cells$FileNames <- vectorNames fileName = filterT2.Cells[["TFN"]] TempPyNames = r_to_py(fileName, convert = FALSE) i = 0 vectorNames = c() while(i < length(TempPyNames)){ starS = TempPyNames[i]$find('-') endS = stri_length(TempPyNames[i]) - 10 FilesNam = substr(TempPyNames[i], py_to_r(starS) + 2, endS) vectorNames = append(vectorNames, FilesNam, length(vectorNames)) i = i + 1 } filterT2.Cells$FileNames <- vectorNames fileName = AdhTre2[["FileName"]] TempPyNames = r_to_py(fileName, convert = FALSE) i = 0 vectorNames = c() while(i < length(TempPyNames)){ starS = TempPyNames[i]$find('-') endS = stri_length(TempPyNames[i]) - 4 FilesNam = substr(TempPyNames[i], py_to_r(starS) + 2, endS) vectorNames = append(vectorNames, FilesNam, length(vectorNames)) i = i + 1 } AdhTre2$FileNames <- vectorNames AdhT1.Cells <- left_join(filterT1.Cells, AdhTre1, by = "FileNames") AdhT2.Cells <-left_join(filterT2.Cells, AdhTre2, by = "FileNames") AdhT1Filtered.Cells <- na.omit(AdhT1.Cells) # This dataF contains the values of adhesion corresponding to treated cells AdhT2Filtered.Cells <- na.omit(AdhT2.Cells) #Extracting the number of the well to filter the data UtemStr <- r_to_py(AdhUn1Filtered.Cells$UWells[nrow(AdhUn1Filtered.Cells)], convert = FALSE) UtotWells <- strtoi(substr(UtemStr, 5, stri_length(UtemStr))) TtemStr <- r_to_py(AdhT1Filtered.Cells$TWells[nrow(AdhT1Filtered.Cells)], convert = FALSE) TtotWells <- strtoi(substr(TtemStr, 5, stri_length(TtemStr))) #Obtaining the 4 central points of untreated cells centralUVals = AdhUn1Filtered.Cells[FALSE,] j = 0 while(j <= UtotWells){ compstr <- paste("Well", toString(j), sep = "") SelectedWell = na.omit(filter(AdhUn1Filtered.Cells, UWells == compstr)) i = 1 while( i <= 4){ h = - (4 * (i - 1) * (i - 2) * (i - 3)) / 6 + (i - 1) * (i - 2) + (i - 1) + 5 selected <- filter(SelectedWell, Index == h) centralUVals <- rbind(centralUVals, selected[1,] ) i = i + 1 } j = j + 1 } #Obtaining the 4 central points of treated cells centralTVals = AdhT1Filtered.Cells[FALSE,] j = 0 while(j <= TtotWells){ compstr <- paste("Well", toString(j), sep = "") SelectedWell = na.omit(filter(AdhT1Filtered.Cells, TWells == compstr)) i = 1 while( i <= 4){ h = - (4 * (i - 1) * (i - 2) * (i - 3)) / 6 + (i - 1) * (i - 2) + (i - 1) + 5 selected <- filter(SelectedWell, Index == h) centralTVals <- rbind(centralTVals, SelectedWell[1,] ) i = i + 1 } j = j + 1 } #cleaning the central values data frames centralUVals <- na.omit(centralUVals) centralTVals <- na.omit(centralTVals) colnames(centralUVals)[14] <- "Adhesion" colnames(centralTVals)[13] <- "Adhesion" colnames(AdhT2Filtered.Cells)[14] <- "Adhesion" #Converting adhesion values to nN centralUVals$Adhesion <- centralUVals$Adhesion * 1e9 centralTVals$Adhesion <- centralTVals$Adhesion * 1e9 AdhUn2Filtered.Cells$Uadhesion <- AdhUn2Filtered.Cells$Uadhesion * 1e9 AdhT2Filtered.Cells$Adhesion <- AdhT2Filtered.Cells$Adhesion * 1e9 #Ploting adhesion histograms % bw <- (2 * IQR(centralUVals[["Adhesion"]]) / length(centralUVals[["Adhesion"]]) ^ (1 / 3)) CellSlUnAdh1.p <- ggplot(centralUVals, aes(x = Adhesion)) + geom_histogram(aes(y = ((..count..)/sum(..count..) * 100)), binwidth = 0.17, color = "#CCCCCC", fill = "#FFFF00") + stat_density(aes(y = ((..count..)/sum(..count..)) * 1500), geom = "line", color = "red", size = 1) + xlim(0,6) + scale_y_continuous() + theme(text = element_text(size = 22)) + ylab("Relative frequency [%]") + xlab("Adhesion [nN]") #+ #labs(title = "Histogram of the cells adhesion", # subtitle = "Untreated c. albicans cells") ggsave("Images/CellSlAdh%_Unt1.svg", CellSlUnAdh1.p, width = 7.5, height = 5, dpi = 300) bw <- (2 * IQR(centralTVals[["Adhesion"]]) / length(centralTVals[["Adhesion"]]) ^ (1 / 3)) CellSlTrAdh1.p <- ggplot(centralTVals, aes(x = Adhesion)) + geom_histogram(aes(y = ((..count..)/sum(..count..) * 100)), binwidth = 0.17, color = "#FFFFCC", fill = "#CCCC00") + stat_density(aes(y = ((..count..)/sum(..count..)) * 1400), geom = "line", color = "red", size = 1) + scale_x_continuous(breaks = c(0,2, 4, 6)) + scale_y_continuous() + theme(text = element_text(size = 22)) + ylab("Relative frequency [%]") + xlab("Adhesion [nN]") #+ #labs(title = "Histogram of the cells adhesion", # subtitle = "C. albicans cells + caspofungin (4xMIC)") ggsave("Images/CellSlAdh%_Tre1.svg", CellSlTrAdh1.p, width = 7.5, height = 5, dpi = 300) bw <- (2 * IQR(AdhUn2Filtered.Cells[["Uadhesion"]]) / length(AdhUn2Filtered.Cells[["Uadhesion"]]) ^ (1 / 3)) CellSlUnAdh2.p <- ggplot(AdhUn2Filtered.Cells, aes(x = Uadhesion)) + geom_histogram(aes(y = ((..count..)/sum(..count..) * 100)), binwidth = 0.17, color = "#CCCCCC", fill = "#FFFF00") + stat_density(aes(y = ((..count..)/sum(..count..)) * 1700), geom = "line", color = "red", size = 1) + scale_x_continuous(breaks = c(0,2, 4, 6)) + scale_y_continuous() + theme(text = element_text(size = 22)) + ylab("Relative frequency [%]") + xlab("Adhesion [nN]") #+ #labs(title = "Histogram of the cells adhesion", # subtitle = "Untreated c. albicans cells") ggsave("Images/CellSlAdh%_Unt2.svg", CellSlUnAdh2.p, width = 7.5, height = 5, dpi = 300) bw <- (2 * IQR(AdhT2Filtered.Cells[["Adhesion"]]) / length(AdhT2Filtered.Cells[["Adhesion"]]) ^ (1 / 3)) CellSlTrAdh2.p <- ggplot(AdhT2Filtered.Cells, aes(x = Adhesion)) + geom_histogram(aes(y = ((..count..)/sum(..count..) * 100)), binwidth = 0.17, color = "#FFFFCC", fill = "#CCCC00") + stat_density(aes(y = ((..count..)/sum(..count..)) * 1400), geom = "line", color = "red", size = 1) + scale_x_continuous(breaks = c(0,2, 4, 6)) + scale_y_continuous() + theme(text = element_text(size = 22)) + ylab("Relative frequency [%]") + xlab("Adhesion [nN]") #+ #labs(title = "Histogram of the cells adhesion", # subtitle = "C. albicans cells + caspofungin (4xMIC)") ggsave("Images/CellSlAdh%_Tre2.svg", CellSlTrAdh2.p, width = 7.5, height = 5, dpi = 300) #Grouping the data kmU1 <- centralUVals %>% subset(select = c("Adhesion")) %>% kmeans(centers = 2) centralUVals$AClust <-as.factor(kmU1$cluster) kmT1 <- centralTVals %>% subset(select = c("Adhesion")) %>% kmeans(centers = 2) centralTVals$AClust <-as.factor(kmT1$cluster) kmU2 <- AdhUn2Filtered.Cells %>% subset(select = c("Uadhesion")) %>% kmeans(centers = 2) AdhUn2Filtered.Cells$AClust <-as.factor(kmU2$cluster) kmT2 <- AdhT2Filtered.Cells %>% subset(select = c("Adhesion")) %>% kmeans(centers = 3) AdhT2Filtered.Cells$AClust <-as.factor(kmT2$cluster) #Obtaining Adhesion statistics of each peak inside the plots------------------------------------ #Change the variable and number of peak and use sd() function Statas <- centralTVals %>% filter(centralTVals$Clust == 2) #Plots of the subgroups of adhesion bw <- (2 * IQR(centralUVals[["Adhesion"]]) / length(centralUVals[["Adhesion"]]) ^ (1 / 3)) CellAdhU1_Clst <- ggplot(centralUVals, aes(x = Adhesion, fill = centralUVals$AClust )) + geom_histogram(aes(y = ((..count..)/sum(..count..) * 100)), binwidth = 0.17, color = "#FF9999") + xlim(0,6) + theme(text = element_text(size = 18)) + ylab("Relative frequency [%]") + xlab("Adhesion [nN]") bw <- (2 * IQR(centralTVals[["Adhesion"]]) / length(centralTVals[["Adhesion"]]) ^ (1 / 3)) CellAdhU1_Clst <- ggplot(centralTVals, aes(x = Adhesion, fill = centralTVals$AClust )) + geom_histogram(aes(y = ((..count..)/sum(..count..) * 100)), binwidth = 0.17, color = "#FF9999") + xlim(0,6) + theme(text = element_text(size = 18)) + ylab("Relative frequency [%]") + xlab("Adhesion [nN]") bw <- (2 * IQR(AdhUn2Filtered.Cells[["Uadhesion"]]) / length(AdhUn2Filtered.Cells[["Uadhesion"]]) ^ (1 / 3)) CellAdhU1_Clst <- ggplot(AdhUn2Filtered.Cells, aes(x = Uadhesion, fill = AdhUn2Filtered.Cells$AClust )) + geom_histogram(aes(y = ((..count..)/sum(..count..) * 100)), binwidth = 0.17, color = "#FF9999") + xlim(0,6) + theme(text = element_text(size = 18)) + ylab("Relative frequency [%]") + xlab("Adhesion [nN]") bw <- (2 * IQR(AdhT2Filtered.Cells[["Adhesion"]]) / length(AdhT2Filtered.Cells[["Adhesion"]]) ^ (1 / 3)) CellAdhU1_Clst <- ggplot(AdhT2Filtered.Cells, aes(x = Adhesion, fill = AdhT2Filtered.Cells$AClust )) + geom_histogram(aes(y = ((..count..)/sum(..count..) * 100)), binwidth = 0.17, color = "#FF9999") + xlim(0,6) + theme(text = element_text(size = 18)) + ylab("Relative frequency [%]") + xlab("Adhesion [nN]") #Anova adhesion------------------------------------------------------------------------------------- #Fisrt merging U+T into one dataFrame colnames(centralUVals)[14] <- "UAdhesion" colnames(centralTVals)[13] <- "TAdhesion" colnames(centralTVals)[14] <- "AClust" Temp <- head(centralTVals, nrow(centralUVals)) Set1.Ad <- cbind(centralUVals,Temp) Temp2 <- head(AdhT2Filtered.Cells, nrow(AdhUn2Filtered.Cells)) Set2.Ad <- cbind(AdhUn2Filtered.Cells,Temp2) colnames(Set2.Ad)[14] <- "TAdhesion" colnames(Set2.Ad)[7] <- "Wells" #Re-arranging the variables Set1.AnovaAdh1 <- Set1.Ad %>% subset(select = c("UAdhesion", "TAdhesion")) %>% gather(AdhGroup, AdhesionVal, UAdhesion, TAdhesion) Set2.AnovaAdh2 <- Set2.Ad %>% subset(select = c("Wells", "Uadhesion", "Adhesion")) %>% gather(AdhGroup, AdhesionVal, Uadhesion, Adhesion) #Renaming AdhesionGroup Set1.AnovaAdh1$AdhGroup <- plyr::revalue(Set1.AnovaAdh1$AdhGroup, c("UAdhesion" = "Native", "TAdhesion" = "Treated")) Set2.AnovaAdh2$AdhGroup <- plyr::revalue(Set2.AnovaAdh2$AdhGroup, c("Uadhesion" = "Native", "Adhesion" = "Treated")) AnovaAdh1 <- ggplot(Set1.AnovaAdh1, aes(x = AdhGroup, y = AdhesionVal)) + geom_boxplot(colour = "blue", fill = c("#FFFF33", "#CC9900")) + scale_x_discrete() + xlab("C. albicans") + ylab("Adhesion [N]") + scale_fill_discrete(breaks = c("Native", "Treated")) + geom_signif(comparisons = list(c("Native", "Treated")), map_signif_level = TRUE) + theme(legend.title = element_blank(), text = element_text(size = 18)) ggsave("Images/ANOVA-Adh1.svg", AnovaAdh1, width = 6, height = 4, dpi = 200) AnovaAdh2 <- ggplot(Set2.AnovaAdh2, aes(x = AdhGroup, y = AdhesionVal)) + geom_boxplot(colour = "blue", fill = c("#FFFF33", "#CC9900")) + scale_x_discrete() + xlab("C. albicans") + ylab("Adhesion [N]") + scale_fill_discrete(breaks = c("Native", "Treated")) + theme(legend.title = element_blank(), text = element_text(size = 18)) + geom_signif(comparisons = list(c("Native", "Treated")), map_signif_level = TRUE) ggsave("Images/ANOVA-Adh2.svg", AnovaAdh2, width = 6, height = 4, dpi = 200) #Obtaining the number of cells contributing to adhesion----------------------------------- UstringNumber <- r_to_py(centralUVals$UWells[nrow(centralUVals)], convert = FALSE) UtotWells1 <- strtoi(substr(UstringNumber, 5, stri_length(UstringNumber))) TstringNumber <- r_to_py(centralTVals$TWells[nrow(centralTVals)], convert = FALSE) TtotWells1 <- strtoi(substr(TstringNumber, 5, stri_length(TstringNumber))) UstringNumber <- r_to_py(AdhUn2Filtered.Cells$UWells[nrow(AdhUn2Filtered.Cells)], convert = FALSE) UtotWells2 <- strtoi(substr(UstringNumber, 5, stri_length(UstringNumber))) TstringNumber <- r_to_py(AdhT2Filtered.Cells$TWells[nrow(AdhT2Filtered.Cells)], convert = FALSE) TtotWells2 <- strtoi(substr(TstringNumber, 5, stri_length(TstringNumber))) j = 0 AveMedianUAdh1 <- data.frame(Well = character(), averagUAdh = numeric(), medianUAdh = numeric(), stringsAsFactors = FALSE) while(j <= UtotWells1){ compstr <- paste("Well", toString(j), sep = "") SelectedWell = na.omit(filter(centralUVals, UWells == compstr)) averageUA = mean(as.numeric(SelectedWell$Adhesion)) medianUA = median(as.numeric(SelectedWell$Adhesion)) AveMedianUAdh1[nrow(AveMedianUAdh1) + 1,] <- list(compstr, averageUA, medianUA) j = j + 1 } j = 0 AveMedianTAdh1 <- data.frame(Well = character(), averagTAdh = numeric(), medianTAdh = numeric(), stringsAsFactors = FALSE) while(j <= TtotWells1){ compstr <- paste("Well", toString(j), sep = "") SelectedWell = na.omit(filter(centralTVals, TWells == compstr)) averageTA = mean(as.numeric(SelectedWell$Adhesion)) medianTA = median(as.numeric(SelectedWell$Adhesion)) AveMedianTAdh1[nrow(AveMedianTAdh1) + 1,] <- list(compstr, averageTA, medianTA) j = j + 1 } j = 0 AveMedianUAdh2 <- data.frame(Well = character(), averagUAdh = numeric(), medianUAdh = numeric(), stringsAsFactors = FALSE) while(j <= UtotWells2){ compstr <- paste("Well", toString(j), sep = "") SelectedWell = na.omit(filter(AdhUn2Filtered.Cells, UWells == compstr)) averageUA = mean(as.numeric(SelectedWell$Uadhesion)) medianUA = median(as.numeric(SelectedWell$Uadhesion)) AveMedianUAdh2[nrow(AveMedianUAdh2) + 1,] <- list(compstr, averageUA, medianUA) j = j + 1 } AveMedianTAdh2 <- data.frame(Well = character(), averagTAdh = numeric(), medianTAdh = numeric(), stringsAsFactors = FALSE) j = 0 while(j <= TtotWells2){ compstr <- paste("Well", toString(j), sep = "") SelectedWell = na.omit(filter(AdhT2Filtered.Cells, TWells == compstr)) averageTA = mean(as.numeric(SelectedWell$Adhesion)) medianTA = median(as.numeric(SelectedWell$Adhesion)) AveMedianTAdh2[nrow(AveMedianTAdh2) + 1,] <- list(compstr, averageTA, medianTA) j = j + 1 } #Eliminating NA values AveMedianUAdh1 <- na.omit(AveMedianUAdh1) AveMedianTAdh1 <- na.omit(AveMedianTAdh1) AveMedianUAdh2 <- na.omit(AveMedianUAdh2) AveMedianTAdh2 <- na.omit(AveMedianTAdh2) #Ploting median values for adhesion bw <- (2 * IQR(AveMedianUAdh2[["averagUAdh"]]) / length(AveMedianUAdh2[["averagUAdh"]]) ^ (1 / 3)) CellSlAverageUA2 <- ggplot(AveMedianUAdh2, aes(x = averagUAdh)) + geom_bar(aes(y = (..count..)/sum(..count..) * 100), binwidth = bw, color = "#FF3333", fill = "#993300") + theme(text = element_text(size = 16)) + ylab("Frequency %") + xlab("Adhesion [N]") + labs(title = "Histogram of the average adhesion values per cell", subtitle = "Native C. albicans cells") ggsave("Images/CellSlAverage_UA2.png", CellSlAverageUA2, width = 7.5, height = 5, dpi = 300) bw <- (2 * IQR(AveMedianUAdh2[["medianUAdh"]]) / length(AveMedianUAdh2[["medianUAdh"]]) ^ (1 / 3)) CellSlMedianUA2 <- ggplot(AveMedianUAdh2, aes(x = medianUAdh)) + geom_bar(aes(y = (..count..)/sum(..count..) * 100), binwidth = bw, color = "#99CCFF", fill = "#3333FF") + theme(text = element_text(size = 16)) + ylab("Frequency %") + xlab("Adhesion [N]") + labs(title = "Histogram of the median adhesion values per cell", subtitle = "Native C. albicans cells") ggsave("Images/CellSlMedian_UA2.png", CellSlMedianUA2, width = 7.5, height = 5, dpi = 300) bw <- (2 * IQR(AveMedianTAdh2[["averagTAdh"]]) / length(AveMedianTAdh2[["averagTAdh"]]) ^ (1 / 3)) CellSlAverageTA2 <- ggplot(AveMedianTAdh2, aes(x = averagTAdh)) + geom_bar(aes(y = (..count..)/sum(..count..) * 100), binwidth = bw, color = "#FF3333", fill = "#993300") + theme(text = element_text(size = 16)) + ylab("Frequency %") + xlab("Adhesion [N]") + labs(title = "Histogram of the average adhesion values per cell", subtitle = "C. albicans cells + caspofungin (4xMIC)") ggsave("Images/CellSlAverage_TA2.png", CellSlAverageTA2, width = 7.5, height = 5, dpi = 300) bw <- (2 * IQR(AveMedianTAdh2[["medianTAdh"]]) / length(AveMedianTAdh2[["medianTAdh"]]) ^ (1 / 3)) CellSlMedianTA2 <- ggplot(AveMedianTAdh2, aes(x = medianTAdh)) + geom_bar(aes(y = (..count..)/sum(..count..) * 100), binwidth = bw, color = "#99CCFF", fill = "#3333FF") + theme(text = element_text(size = 16)) + ylab("Frequency %") + xlab("Adhesion [N]") + labs(title = "Histogram of the median adhesion values per cell", subtitle = "C. albicans cells + caspofungin (4xMIC)") ggsave("Images/CellSlMedian_TA2.png", CellSlMedianTA2, width = 7.5, height = 5, dpi = 300)