##Read in the file that contains the new -omics data of interest with the identifiers in column 1, in tab delimited format, 
##replace FILEPATH with local file location, R expects all columns to have a header. Fold changes should be in column 2 and adjP in column 3
#Run each command line by line unless otherwise stated
file_of_new_data = read.delim('FILEPATH/Additional File 2.txt',header=T)
file_of_new_data = as.matrix(file_of_new_data)

##Read in the Fold Changes.txt from the application, replace FILEPATH with local file location
file_of_fold_changes = read.delim('FILEPATH/Fold Changes.txt',header=T)

#Remove metadata
metadata = as.matrix(file_of_fold_changes)[c(1:4),]
file_of_fold_changes = as.matrix(file_of_fold_changes)[-c(1:4),]


##Add new data to file_of_fold_changes

############ HIGHLIGHT ALL BEFORE RUN /START ############
output_file = c()
for(i in 1:nrow(file_of_fold_changes))
{
  positions = grep(substr(file_of_fold_changes[i,1],1,10),file_of_new_data[,1])
    if(length(positions)>0)
    {
      if(length(positions)>1)
      {
        for(j in length(positions))
        {
          output_file = rbind(output_file,c(file_of_fold_changes[i,],file_of_new_data[positions[j],2:3]))
        }
      }
      else
      {
        output_file = rbind(output_file,c(file_of_fold_changes[i,],file_of_new_data[positions,2:3]))
      }
    }
    else
    {
      #As described in section 3.1 this replaces missing data with 'NA', these NAs can be replaced at user discretion 
      output_file = rbind(output_file,c(file_of_fold_changes[i,],NA,NA))
    }
}

############ HIGHLIGHT ALL BEFORE RUN /END ############


#Change the headers to identify the datasets replacing NEW_DATA_NAME with user defined name for new -omics information
colnames(output_file) = c(colnames(file_of_fold_changes),'NEW_DATA_NAMEFC','NEW_DATA_NAMEQ')


#Replace “NA” in the combined dataset with column (condition/sample) median. 
#Users who wish to use advanced imputing method to fill in missing data should do so with individual datasets (specifically) before combining of datasets.

position1 = which(colnames(output_file)=='NEW_DATA_NAMEFC')

#Find rows containing 'NA'
na_positions = which(is.na(output_file[,position1])==T)

############ HIGHLIGHT ALL BEFORE RUN /START ############
if(length(na_positions)>0)
{
  #Median value of the fold change column
  median_val = output_file[-(na_positions),position1]
  mode(median_val) = 'numeric'
  output_file[na_positions,position1] = median(median_val)
  
  #Median value of the p-value column
  median_val = output_file[-(na_positions),(position1+1)]
  mode(median_val) = 'numeric'
  output_file[na_positions,(position1+1)] = median(median_val)
}

############ HIGHLIGHT ALL BEFORE RUN /END ############

#Extract Fold Changes
Fold_changes = output_file[,grep('FC',colnames(output_file))]

#Perform Quantile normalization on the combined dataset to ensure data from difference sources are comparable 
#(i.e. making the distribution of Log2FoldChange of each single dataset having identical statistical properties).
library(preprocessCore)
mode(Fold_changes) = 'numeric'
Fold_changes_QN = normalize.quantiles(Fold_changes)
colnames(Fold_changes_QN) = colnames(output_file[,grep('FC',colnames(output_file))])

#Create final output file for use in next script
output_file2 = cbind(output_file[,1:3],Fold_changes_QN,output_file[,grep('Q',colnames(output_file))])

#Replace the variables below with relevant information. Remember to leave the inverted commas around words.
#Replace NEWFILE with name you wish the dataset to appear as
NEWFILE_FC = c('COUNTRY','EXPOSURE STATUS','SPECIES','INSECTICIDE')
NEWFILE_Q = c('COUNTRY','EXPOSURE STATUS','SPECIES','INSECTICIDE')

#Replace NEWFILE with name you wish the dataset to appear as
metadata_out = cbind(metadata[,1:3],metadata[,grep('FC',colnames(metadata))],NEWFILE_FC)
metadata_out = cbind(metadata_out,metadata[,grep('Q',colnames(metadata))],NEWFILE_Q)

output_file2 = rbind(metadata_out,output_file2)

#Write the outputfile to a folder of choice by changing FILETPATH/output.txt. Metadata will need to be added and the FC and Q values
#placed in the correct position as defined in the user guide and then this file should replace Fold Changes.txt in the app folder

write.table(output_file2,'FILEPATH/Fold Changes.txt',sep='\t',row.names=F)