# -*- coding: utf-8 -*- ''' This script uses the NormReads dictionaries of 8 (!) samples and generates Enrichment profile for 4 given samples. Codon resolution added + Window Enrichment profile for 4 Sample pairs Total/IP. Gives only one profile assuming replicates. Comment: The part 'data input' creates a dictionary with each gene as keys (=data). The value is a list composed of eight lists (=geneProfile), one for each sample. During the for-loop each file is opened and each gene given in the reference (YeastDB_3.0) is searched. Each position value is added to the list geneProfile which is then (after completeness) added to the respective gene in the dictionary data. ''' import pickle import re import matplotlib.pyplot as plt import pylab import numpy as np def geneProfile(reference, path, pathout, infileList): data = {} length = {} for gene in genelist: data[gene] = [] length[gene] = 0 for file in infileList: # data input dictFile = pickle.load(open(path + file + '/DictReads_' + file + '.pkl', 'rb')) for gene in genelist: geneProfile = [] sys = gene maxi = int(max(re.split('\W+', reference[gene][3].strip(',')))) mini = int(min(re.split('\W+', reference[gene][3].strip(',')))) length[gene] = maxi-mini+1 chrom = str(reference[gene][1]).zfill(2) strand = reference[gene][2] value_pos = 6 if strand == '+' else 8 if strand == '+': for pos in range(-100, maxi-mini + 101): key = chrom + '\t' + str(mini+pos).zfill(7) try: geneProfile.append(dictFile[key][value_pos]) except KeyError: geneProfile.append(0.0) else: for pos in range(-100, maxi-mini + 101): key = chrom + '\t' + str(maxi-pos).zfill(7) try: geneProfile.append(dictFile[key][value_pos]) except KeyError: geneProfile.append(0.0) data[gene].append(geneProfile) for gene in genelist: lista=[] listb=[] listc=[] listd=[] liste=[] listf=[] listg=[] listh=[] #Amino Acid scale for i in range(-99,length[gene]+100,3): a=data[gene][0][i]+ data[gene][0][i+1]+ data[gene][0][i+2] b=data[gene][1][i]+ data[gene][1][i+1]+ data[gene][1][i+2] c=data[gene][2][i]+ data[gene][2][i+1]+ data[gene][2][i+2] d=data[gene][3][i]+ data[gene][3][i+1]+ data[gene][3][i+2] e=data[gene][4][i]+ data[gene][4][i+1]+ data[gene][4][i+2] f=data[gene][5][i]+ data[gene][5][i+1]+ data[gene][5][i+2] g=data[gene][6][i]+ data[gene][6][i+1]+ data[gene][6][i+2] h=data[gene][7][i]+ data[gene][7][i+1]+ data[gene][7][i+2] lista.append(a/3) listb.append(b/3) listc.append(c/3) listd.append(d/3) liste.append(e/3) listf.append(f/3) listg.append(g/3) listh.append(h/3) #WINDOW applied if Window > 2: for j in range(33,int(length[gene]/3 + 33)): k=(j - window_plus) l=(j + window_minus) d=np.sum(listb[k:l]) c=np.sum(lista[k:l]) e=np.sum(listc[k:l]) f=np.sum(listd[k:l]) g=np.sum(liste[k:l]) h=np.sum(listf[k:l]) i=np.sum(listg[k:l]) m=np.sum(listh[k:l]) lista[j]=c/ Window listb[j]=d/ Window listc[j]=e/ Window listd[j]=f/ Window liste[j]=g/ Window listf[j]=h/ Window listg[j]=i/ Window listh[j]=m/ Window else: pass #PLOT geneTitle = 'Gene: ' + gene + ' / ' + reference[gene][0] + ' (' + reference[gene][9] + ')' filename = pathout + gene + '_' + reference[gene][0] + '.png' x = np.arange(-33,length[gene]/3 + 34,1) print(x) print(len(lista)) #TWo replicates sample one y_1 = np.array(np.array(listb)/np.array(lista)) y_2 = np.array(np.array(listd)/np.array(listc)) y_3 = np.array((y_1 + y_2)/2) #two replicates sample two y_4 = np.array(np.array(listf)/np.array(liste)) y_5 = np.array(np.array(listh)/np.array(listg)) y_6 = np.array((y_4 + y_5)/2) print (len(range(-100, length[gene] + 101))) print (len(data[gene][0])) print (len(data[gene][1])) width=(length[gene]/100) plt.figure(figsize=(width,10)) plt.plot(x, y_3, 'b',alpha = 0.5, label='WT') plt.plot(x,y_6,'r', alpha = 0.5, label='Delta NAC') plt.fill_between(x, y_1, y_2, color = 'lightgrey', linewidth=0.0, alpha= 0.5) plt.fill_between(x, y_4, y_5, color = 'lightgrey', linewidth=0.0, alpha= 0.5) plt.grid(True) plt.ylim((0,100)) plt.axhline (y=2, xmin=0, color='darkred',linestyle = '--', alpha = 0.5, label='2-fold enrichment') plt.legend(loc='upper right', fontsize = 3) plt.xlabel('Ribosome Position from TSS [Codons/aa]') plt.ylabel('Enrichment IP/Total Translatome') plt.title(geneTitle) pylab.savefig(filename, dpi= 300) plt.close() if __name__ == '__main__': reference = pickle.load(open('/mnt/DeepSeq_share/Kevin/Sequencing/REFERENCE/YeastDB_3.0.pkl', 'rb')) path = '/mnt/DeepSeq_share/Kevin/Sequencing/01_DATA/' genes=open(path+'GeneList.txt') genelist=[] for line in genes: genelist.append(line.strip('\n')) Window= 11 window_plus=int((Window-1)/2) window_minus=int((Window+1)/2) pathout = '/your_path_of_choice' infileList = ['Sample1', 'Sample2','Sample3','Sample4','Sample5','Sample6','Sample7','Sample8'] #SYNTAX Above --> [total1, ip1, total1, ip1 /// total2 ip2 total2 ip2] geneProfile(reference, path, pathout, infileList)