# -*- coding: utf-8 -*-
'''
This script uses the NormReads dictionaries of 8 (!) samples and generates 
Enrichment profile for 4 given samples. Codon resolution added + Window
Enrichment profile for 4 Sample pairs Total/IP. Gives only one profile assuming replicates.

Comment: The part 'data input' creates a dictionary with each gene as keys 
(=data). The value is a list composed of eight lists (=geneProfile), one for 
each sample. During the for-loop each file is opened and each gene given in the
reference (YeastDB_3.0) is searched. Each position value is added to the list 
geneProfile which is then (after completeness) added to the respective gene 
in the dictionary data. 

'''

import pickle 
import re
import matplotlib.pyplot as plt
import pylab
import numpy as np


def geneProfile(reference, path, pathout, infileList):

    data = {}
    length = {}
    for gene in genelist:
        data[gene] = []
        length[gene] = 0
   
    for file in infileList:                                         # data input
        dictFile = pickle.load(open(path + file + '/DictReads_' + file + '.pkl', 'rb'))
        for gene in genelist:
            geneProfile = []
            sys = gene
            maxi = int(max(re.split('\W+', reference[gene][3].strip(','))))
            mini = int(min(re.split('\W+', reference[gene][3].strip(','))))
            length[gene] = maxi-mini+1
            chrom  = str(reference[gene][1]).zfill(2)
            strand = reference[gene][2]
            value_pos = 6 if strand == '+' else 8
            if strand == '+':
                for pos in range(-100, maxi-mini + 101):
                    key = chrom + '\t' + str(mini+pos).zfill(7)
                    try: 
                        geneProfile.append(dictFile[key][value_pos])
                    except KeyError:
                        geneProfile.append(0.0)
            else: 
                for pos in range(-100, maxi-mini + 101):
                    key = chrom + '\t' + str(maxi-pos).zfill(7)
                    try: 
                        geneProfile.append(dictFile[key][value_pos])
                    except KeyError:
                        geneProfile.append(0.0)
            data[gene].append(geneProfile)
            
       
    for gene in genelist:
        lista=[]
        listb=[]
        listc=[]
        listd=[]
        liste=[]
        listf=[]
        listg=[]
        listh=[]
        
        #Amino Acid scale
        for i in range(-99,length[gene]+100,3):
            a=data[gene][0][i]+ data[gene][0][i+1]+ data[gene][0][i+2]
            b=data[gene][1][i]+ data[gene][1][i+1]+ data[gene][1][i+2]
            c=data[gene][2][i]+ data[gene][2][i+1]+ data[gene][2][i+2]
            d=data[gene][3][i]+ data[gene][3][i+1]+ data[gene][3][i+2]
            e=data[gene][4][i]+ data[gene][4][i+1]+ data[gene][4][i+2]
            f=data[gene][5][i]+ data[gene][5][i+1]+ data[gene][5][i+2]
            g=data[gene][6][i]+ data[gene][6][i+1]+ data[gene][6][i+2]
            h=data[gene][7][i]+ data[gene][7][i+1]+ data[gene][7][i+2]
            lista.append(a/3)
            listb.append(b/3)
            listc.append(c/3)
            listd.append(d/3)
            liste.append(e/3)
            listf.append(f/3)
            listg.append(g/3)
            listh.append(h/3)
        #WINDOW applied
        if Window > 2:    
            for j in range(33,int(length[gene]/3 + 33)):
                k=(j - window_plus)
                l=(j + window_minus)
                d=np.sum(listb[k:l])            
                c=np.sum(lista[k:l])
                e=np.sum(listc[k:l])
                f=np.sum(listd[k:l])
                g=np.sum(liste[k:l])            
                h=np.sum(listf[k:l])
                i=np.sum(listg[k:l])
                m=np.sum(listh[k:l])
                
                lista[j]=c/ Window
                listb[j]=d/ Window
                listc[j]=e/ Window
                listd[j]=f/ Window
                liste[j]=g/ Window
                listf[j]=h/ Window
                listg[j]=i/ Window
                listh[j]=m/ Window
        else:
            pass
       
        #PLOT   
        geneTitle = 'Gene:  ' + gene + ' / ' + reference[gene][0] + '   (' + reference[gene][9] + ')'
        filename = pathout + gene + '_' + reference[gene][0] + '.png'
        x = np.arange(-33,length[gene]/3 + 34,1)
        print(x)
        print(len(lista))
        #TWo replicates sample one     
                 
        y_1 = np.array(np.array(listb)/np.array(lista))
        y_2 = np.array(np.array(listd)/np.array(listc))
        y_3 = np.array((y_1 + y_2)/2)
        #two replicates sample two
        y_4 = np.array(np.array(listf)/np.array(liste))
        y_5 = np.array(np.array(listh)/np.array(listg))
        y_6 = np.array((y_4 + y_5)/2)
        
        print (len(range(-100, length[gene] + 101)))
        print (len(data[gene][0]))
        print (len(data[gene][1]))
        width=(length[gene]/100)
        plt.figure(figsize=(width,10))
        plt.plot(x, y_3, 'b',alpha = 0.5, label='WT')
        plt.plot(x,y_6,'r', alpha = 0.5, label='Delta NAC')
        plt.fill_between(x, y_1, y_2, color = 'lightgrey', linewidth=0.0, alpha= 0.5)
        plt.fill_between(x, y_4, y_5, color = 'lightgrey', linewidth=0.0, alpha= 0.5)
        plt.grid(True)
        plt.ylim((0,100))
        plt.axhline (y=2, xmin=0, color='darkred',linestyle = '--', alpha = 0.5, label='2-fold enrichment')
        plt.legend(loc='upper right', fontsize = 3)
        plt.xlabel('Ribosome Position from TSS [Codons/aa]')
        plt.ylabel('Enrichment IP/Total Translatome')
        plt.title(geneTitle)
        
        
        pylab.savefig(filename, dpi= 300)
        plt.close() 

        
if __name__ == '__main__':
    
    reference = pickle.load(open('/mnt/DeepSeq_share/Kevin/Sequencing/REFERENCE/YeastDB_3.0.pkl', 'rb'))
    path = '/mnt/DeepSeq_share/Kevin/Sequencing/01_DATA/'
    genes=open(path+'GeneList.txt')
    genelist=[]
    for line in genes:
        genelist.append(line.strip('\n'))
    Window= 11
    window_plus=int((Window-1)/2)
    window_minus=int((Window+1)/2)    

    pathout = '/your_path_of_choice'   
    infileList = ['Sample1', 'Sample2','Sample3','Sample4','Sample5','Sample6','Sample7','Sample8']
    #SYNTAX Above --> [total1, ip1, total1, ip1 /// total2 ip2 total2 ip2]
    geneProfile(reference, path, pathout, infileList)