##Read in the file that contains the new -omics data of interest with the identifiers in column 1, in tab delimited format, ##replace FILEPATH with local file location, R expects all columns to have a header. Fold changes should be in column 2 and adjP in column 3 #Run each command line by line unless otherwise stated file_of_new_data = read.delim('FILEPATH/Additional File 2.txt',header=T) file_of_new_data = as.matrix(file_of_new_data) ##Read in the Fold Changes.txt from the application, replace FILEPATH with local file location file_of_fold_changes = read.delim('FILEPATH/Fold Changes.txt',header=T) #Remove metadata metadata = as.matrix(file_of_fold_changes)[c(1:4),] file_of_fold_changes = as.matrix(file_of_fold_changes)[-c(1:4),] ##Add new data to file_of_fold_changes ############ HIGHLIGHT ALL BEFORE RUN /START ############ output_file = c() for(i in 1:nrow(file_of_fold_changes)) { positions = grep(substr(file_of_fold_changes[i,1],1,10),file_of_new_data[,1]) if(length(positions)>0) { if(length(positions)>1) { for(j in length(positions)) { output_file = rbind(output_file,c(file_of_fold_changes[i,],file_of_new_data[positions[j],2:3])) } } else { output_file = rbind(output_file,c(file_of_fold_changes[i,],file_of_new_data[positions,2:3])) } } else { #As described in section 3.1 this replaces missing data with 'NA', these NAs can be replaced at user discretion output_file = rbind(output_file,c(file_of_fold_changes[i,],NA,NA)) } } ############ HIGHLIGHT ALL BEFORE RUN /END ############ #Change the headers to identify the datasets replacing NEW_DATA_NAME with user defined name for new -omics information colnames(output_file) = c(colnames(file_of_fold_changes),'NEW_DATA_NAMEFC','NEW_DATA_NAMEQ') #Replace “NA” in the combined dataset with column (condition/sample) median. #Users who wish to use advanced imputing method to fill in missing data should do so with individual datasets (specifically) before combining of datasets. position1 = which(colnames(output_file)=='NEW_DATA_NAMEFC') #Find rows containing 'NA' na_positions = which(is.na(output_file[,position1])==T) ############ HIGHLIGHT ALL BEFORE RUN /START ############ if(length(na_positions)>0) { #Median value of the fold change column median_val = output_file[-(na_positions),position1] mode(median_val) = 'numeric' output_file[na_positions,position1] = median(median_val) #Median value of the p-value column median_val = output_file[-(na_positions),(position1+1)] mode(median_val) = 'numeric' output_file[na_positions,(position1+1)] = median(median_val) } ############ HIGHLIGHT ALL BEFORE RUN /END ############ #Extract Fold Changes Fold_changes = output_file[,grep('FC',colnames(output_file))] #Perform Quantile normalization on the combined dataset to ensure data from difference sources are comparable #(i.e. making the distribution of Log2FoldChange of each single dataset having identical statistical properties). library(preprocessCore) mode(Fold_changes) = 'numeric' Fold_changes_QN = normalize.quantiles(Fold_changes) colnames(Fold_changes_QN) = colnames(output_file[,grep('FC',colnames(output_file))]) #Create final output file for use in next script output_file2 = cbind(output_file[,1:3],Fold_changes_QN,output_file[,grep('Q',colnames(output_file))]) #Replace the variables below with relevant information. Remember to leave the inverted commas around words. #Replace NEWFILE with name you wish the dataset to appear as NEWFILE_FC = c('COUNTRY','EXPOSURE STATUS','SPECIES','INSECTICIDE') NEWFILE_Q = c('COUNTRY','EXPOSURE STATUS','SPECIES','INSECTICIDE') #Replace NEWFILE with name you wish the dataset to appear as metadata_out = cbind(metadata[,1:3],metadata[,grep('FC',colnames(metadata))],NEWFILE_FC) metadata_out = cbind(metadata_out,metadata[,grep('Q',colnames(metadata))],NEWFILE_Q) output_file2 = rbind(metadata_out,output_file2) #Write the outputfile to a folder of choice by changing FILETPATH/output.txt. Metadata will need to be added and the FC and Q values #placed in the correct position as defined in the user guide and then this file should replace Fold Changes.txt in the app folder write.table(output_file2,'FILEPATH/Fold Changes.txt',sep='\t',row.names=F)