'''
Script that provides two types of univariate feature selection (Variance thresholding
and Select K Best). See program help for more info.
'''

import sys, argparse
import numpy as np
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

def select_features(model_name, k, verbose, surpress_response_column, p_values):
    header = []
    names = []
    features = []
    labels = []
    for i, l in enumerate(sys.stdin):
        if i == 0:
            header = l.strip().split('\t')
            continue
        n_fs_l = l.strip().split('\t')
        names.append(n_fs_l[0])
        fs = [float(_) for _ in n_fs_l[1:-1]]
        l = n_fs_l[-1]
        features.append(fs)
        labels.append(l)

    X = np.array(features)
    y = np.array(labels)

    if model_name == 'vt':
        model = VarianceThreshold(threshold=0.8 * (1 - 0.8))
    else:
        if not k:
            print('Cannot run SelectKBest without a k (number_of_features) parameter.')
            sys.exit(0)
        model = SelectKBest(chi2, k=k)

    X_new = model.fit_transform(X, y)

    if model_name == 'skb':
        indices = np.argsort(model.scores_)[::-1]
        #print(indices)
        support = model.get_support()
        if verbose:
            #print(support)
            if surpress_response_column:
                print('%s\t%s' % (header[0],'\t'.join([header[i+1] for i in indices if support[i]])))
            else:
                print('%s\t%s\t%s' % (header[0],'\t'.join([header[i+1] for i in indices if support[i]]), header[-1]))

            if p_values:
                p_values = model.pvalues_
                print('\t%s' % ('\t'.join(str(p_values[i]) for i in indices if support[i])))

        for i in range(X.shape[0]):

            line = names[i] + '\t'

            for j in indices:
                if not support[j]:
                    continue
                line += str(X[i,j]) + '\t'

            line = line.strip()

            #line = '%s\t%s' % (names[i], '\t'.join(str(feature) for feature in list(features)))
            if verbose and not surpress_response_column:
                line += '\t%s' % labels[i]
            print(line)
    else:
        if verbose:
            support = model.get_support()
            print(support)
            if surpress_response_column:
                print('%s\t%s' % (header[0],'\t'.join([header[i+1] for i, _ in enumerate(support) if _])))
            else:
                print('%s\t%s\t%s' % (header[0],'\t'.join([header[i+1] for i, _ in enumerate(support) if _]), header[-1]))

        for i, features in enumerate(X_new):
            line = '%s\t%s' % (names[i], '\t'.join(str(feature) for feature in list(features)))
            if verbose and not surpress_response_column:
                line += '\t%s' % labels[i]
            print(line)

if __name__ == "__main__":
    argument_parser = argparse.ArgumentParser()
    argument_parser.add_argument('-m', '--model', type=str, choices=['vt', 'skb'], required=True, help='The type univariate filtering to apply.')
    argument_parser.add_argument('-k', '--number_of_features', type=int, required=False, help='The number of features to select.')
    argument_parser.add_argument('-v', '--verbose', action='store_true', help='Outputs the names of the selected features.')
    argument_parser.add_argument('-s', '--surpress_response_column', action='store_true', help='Does not output the response column when the -v flag is given.')
    argument_parser.add_argument('-p', '--p_values', action='store_true', help='Prints p-values for selected features.')
    arguments = argument_parser.parse_args()
    select_features(arguments.model, arguments.number_of_features, arguments.verbose, arguments.surpress_response_column, arguments.p_values)
