library(limma) library(tidyverse) library(imputeLCMD) # The targets_for_crapome.txt file is a text tab delimited file that contains tabular data # with rows corresponding to sample name columns in the MaxQuant proteinGroups.txt file # that should be selected for analysis. # # The file should contain the following columns: # samp: arbitrary row id # SampleName: sample replicate names - important, these should correspond to column names in the MaxQuant file # prep: the sample prep name # BaitName: name of the bait # APName: name of the bait group used for differential analysis crapometargets <- read.table('targets_for_crapome.txt', sep = '\t', header = T) crapometargets$SampleName <- gsub("LFQ.","", crapometargets$SampleName) maxquantfile <- "DYRK1A_Interactome_MasterMaxQuantAnalysis_perseusAnnot_imputed.txt" # Read in the MaxQuant proteinGroups.txt file to get the peptide spectral counts # mqdf <- read.delim(maxquantfile, comment.char="#", stringsAsFactors=FALSE) mqdf$Gene.name <- unlist(lapply(strsplit(mqdf$Gene.name, ";"), "[", 1)) mqdf$uniprot.id <- unlist(lapply(strsplit(mqdf$Majority.protein.IDs, ";"), "[", 1)) mqdf$uniprot.id <- unlist(lapply(strsplit(mqdf$uniprot.id, "\\|"), "[", 3)) mqdf$uniprot.acc <- unlist(lapply(strsplit(mqdf$Majority.protein.IDs, "\\|"), "[", 2)) mqdf$uniprot.acc <- gsub(";.*","",mqdf$uniprot.acc) razid <- grepl("Razor...unique.peptides.", names(mqdf)) raznms <- grep("Razor...unique.peptides.", names(mqdf), value = T) newraznms <- gsub("Razor\\.\\.\\.unique\\.peptides\\.(.*)", "\\1_RZUP", raznms) samps2keep <- paste(crapometargets$SampleName, "_RZUP", sep='') samps2keep[!samps2keep %in% names(mqdf)] names(mqdf)[razid] <- newraznms mqdf$gene.uniprot.acc <- paste(mqdf$Gene.name, mqdf$uniprot.acc, sep='_') cols2keep <- c(samps2keep, 'uniprot.id') razoutnms[!razoutnms %in% names(mqdf)] mqdf$razuniqpeps <- mqdf$Razor...unique.peptides cntnms <- c("MS.MS.count", "razuniqpeps","Unique.peptides" , "Sequence.coverage....","Unique...razor.sequence.coverage...." ,"Unique.sequence.coverage....") # This deals with cases where multiple isoforms map to the same uniprot.id # The isoform with the highest spectral count is selected mqdf.uniqgene <- mqdf %>% dplyr::group_by( uniprot.id) %>% dplyr::mutate(the_rank = rank(-razuniqpeps, ties.method = "random")) %>% dplyr::filter(the_rank == 1) %>% select(cols2keep) # For CRAPome input file generation newraznms <- raznms <- grep("RZUP", names(mqdf), value = T) # This command converts the data in "wide" form to the long form that # CRAPome analysis requires. The multiple Sample name columns are collapsed into key-value pairs, # duplicating the uniprot.id column. crapome <- mqdf.uniqgene %>% select(uniprot.id, cols2keep) %>% gather(key = SampleName, value = SC, cols2keep, -uniprot.id) %>% select(SampleName, uniprot.id, SC) crapome$SampleName <- gsub("_RZUP","",crapome$SampleName) crapome$SampleName <- gsub("_","",crapome$SampleName) # This assumes only 1 bain (DYRK1A in this case) and the sample names # that have "NucBOSubcell*" as a SampleName are the beads only controls, # which have to be recoded to "C" as the controls, for the CRAPome analysis # to work properly # crapome$SampType <- "DYRK1A" crapome$SampType[grepl("NucBOSubcell", crapome$SampleName)] <- "C" crapome <- crapome %>% select(SampType, SampleName, uniprot.id, SC) # Write out the file to use as input to the CRAPome tool # built by the Alexey Nesvizhskii lab: # http://crapome.org/?q=chooseworkflow # Use "Workflow 3: Analyze Your Data" workflow to upload this file # You should use only user controls for this analysis. See help files at crapome.org # write.table(crapome,file='DYRK1A_interactomeIPMS_CRAPomeInputData_20190614.txt', quote = F,sep = '\t',na = "",row.names = F,col.names = F)