% Supplementary File 2 - Train

% Jonghee Yoon et al. "Label-free optical technique for identifying 
% lymphocyte types using 3D quantitative phase imaging and machine
% learning," Journal of Visualized Experiments

% Written by YoungJu Jo
% KAIST Biomedical Optics Lab & Tomocube

%%

clear; close all;

train_set = 'D:\lymphocytes\features_split\train';  % directory with training data

save_dir = 'D:\lymphocytes\models';  % directory to save classifiation model 
save_name = 'trained_model';  % name of classification model 

feature_combination = [1.342 1; 1.368 1; 1.368 2; 1.342 3; 1.368 3; 1.368 4; 1.342 5; 1.368 5];
% RI threshold: 1.340:0.002:1.378
% 1: Surface Area / 2: Cellular Volume / 3: Sphericity Index / 4: Protein Density / 5: Dry Mass


%%

% read features in training set

cd(train_set);

class_folders = dir;
class_folders(1:2) = [];

num_classes = length(class_folders);

feature_train = [];
label_train = [];
classes = cell(num_classes,1);

for class_idx = 1:num_classes
    
    cd(train_set);
    cd(class_folders(class_idx).name);
    class_name = class_folders(class_idx).name;
    exp_files = dir('*.mat');
    
    feature_class = zeros(length(exp_files),size(feature_combination,1));
    label_class = class_idx * ones(length(exp_files),1);
    
    for exp_idx = 1:length(exp_files)
        
        load(exp_files(exp_idx).name);
        
        feature_selected = zeros(1,size(feature_combination,1));
        for feature_idx = 1:size(feature_combination,1)
            feature_selected(feature_idx) = features(round(RI_threshold*1000)==round(feature_combination(feature_idx,1)*1000),feature_combination(feature_idx,2));
        end
        feature_class(exp_idx,:) = feature_selected;

    end
    
    label_class(sum(isnan(feature_class),2)~=0) = [];
    feature_class(sum(isnan(feature_class),2)~=0,:) = [];
    
    feature_train = [feature_train; feature_class];
    label_train = [label_train; label_class];
    classes(class_idx) = {class_name};
    
end

[feature_train_standardized,feature_mu,feature_sigma] = zscore(feature_train);  % per-feature standardization

classification_model = fitcknn(feature_train_standardized,label_train,'NumNeighbors',4);  % training a k-NN classifier

CV_classification_model = crossval(classification_model,'leaveout','on');  % leave-one-out cross-validation
figure('units','normalized','outerposition',[0 0 1 1]),
plot(kfoldPredict(CV_classification_model),'.r','MarkerSize',10); hold on;
plot(label_train,'ob'); hold off;
xlabel('Individual Cell'); ylabel('Class'); legend('Prediction','Ground Truth');
CV_accuracy = (1-kfoldLoss(CV_classification_model))*100;
title(strcat(num2str(CV_accuracy),'% (Cross-validation Accuracy)'),'FontSize',20); % classification result
yticks(1:num_classes); yticklabels(classes);

% save
cd(save_dir);
save(save_name,'classification_model','feature_mu','feature_sigma','feature_combination','feature_train','label_train','classes','CV_accuracy');

       
        
