% Supplementary File 1 - Feature Extraction

% Jonghee Yoon et al. "Label-free optical technique for identifying 
% lymphocyte types using 3D quantitative phase imaging and machine
% learning," Journal of Visualized Experiments

% Written by YoungJu Jo
% KAIST Biomedical Optics Lab & Tomocube

%%

clear; close all;

source_dir = 'D:\lymphocytes\tomograms';  % directory with tomographic data
target_dir = 'D:\lymphocytes\features';  % directory to save extracted features 

RI_threshold = 1.340:0.002:1.378;  %% RI thresholds for feature extraction


%%

figure('units','normalized','outerposition',[0 0 1 1]),

cd(source_dir);

cell_type_folders = dir;
cell_type_folders(1:2) = [];

for cell_type_idx = 1:length(cell_type_folders)
    
    cd(target_dir);
    if ~exist(cell_type_folders(cell_type_idx).name)
        mkdir(cell_type_folders(cell_type_idx).name);
    end
    
    cd(source_dir);
    cd(cell_type_folders(cell_type_idx).name);
    
    class_name = cell_type_folders(cell_type_idx).name;
    tomo_files = dir('*.mat');
    
    for tomo_idx = 1:length(tomo_files)

        cd(source_dir);
        cd(cell_type_folders(cell_type_idx).name);
        load(tomo_files(tomo_idx).name);
        
        if (resx~=resy)||(resx~=resz)
            error('Voxel size should be isotropic.');
        end
        
        % visualization
        subplot(131); imagesc(max(data,[],3),[1.337 1.4]); axis image; colormap jet; title(class_name);
        subplot(132); imagesc(flipud(squeeze(max(data,[],2))'),[1.337 1.4]); axis image; title(num2str(tomo_idx));
        
        data_smooth = imgaussfilt3(data,1);  % temporary smoothing for RI-based detection
        
        features = zeros(length(RI_threshold),5);
        
        for thres_idx = 1:length(RI_threshold)
            
            % selection of voxels at a given RI threshold
            cmap = data_smooth > RI_threshold(thres_idx);  % detecting cell-like regions
            cmap = imfill(cmap,'holes');  % removing hole artifacts
            cmap_CC = bwconncomp(cmap,26);
            numPixels = cellfun(@numel,cmap_CC.PixelIdxList);
            [~,idx] = max(numPixels);    % the largest region as cell
            
            if ~isempty(idx)
                cmap = zeros([size(data,1),size(data,2),size(data,3)]);
                cmap(cmap_CC.PixelIdxList{idx}) = 1;
            else
                features(thres_idx,:) = nan;
                continue;
            end
            
            data_masked = data .* cmap;
            
            % Cellular Volume [fL]
            features(thres_idx,2) = sum(cmap(:))*resx*resy*resz;
            % Dry Mass [pg]
            features(thres_idx,5) = (sum(data_masked(:))-ri*sum(cmap(:)))*resx*resy*resz/0.2; % RI increment = 0.2 mL/g (mostly protein)
            % Protein Density [g/dL]
            features(thres_idx,4) = features(thres_idx,5) / features(thres_idx,2) * 100;
            
            p2 = isosurface(cmap,0.5);
            v = p2.vertices; f = p2.faces;
            if isnan(f)==0
                a = v(f(:,2),:) - v(f(:,1),:); b = v(f(:,3),:) - v(f(:,1),:);
                c = cross(a,b,2) * 0.5;
                % Surface Area [um^2]
                features(thres_idx,1) = sum(sqrt(sum(c.^2,2)))*resx*resy;
                % Sphericity Index
                features(thres_idx,3) = pi^(1/3)*6^(2/3)*features(thres_idx,2)^(2/3)/features(thres_idx,1); 
            else
                features(thres_idx,1) = nan; features(thres_idx,3) = nan;
            end
            
            % visualization
            subplot(133); imagesc(max(data_masked,[],3),[1.337 1.4]); axis image; title(strcat('RI threshold=',num2str(RI_threshold(thres_idx))));
            pause(0.001);
            
        end
        
        % save
        cd(target_dir);
        cd(cell_type_folders(cell_type_idx).name);
        save(strcat('features_',tomo_files(tomo_idx).name),'features','RI_threshold','class_name');
        
    end

end
close;
