%% Example of analysis with Matlab from Mathworks and PLS_toolbox from Eigenvector Research.


%(c) 2017 David Perez Guaita 
% david.perez.guaita@monash.edu miguela.martin@monash.edu 

%% INPUTS
% X=SPECTRA
% wnm=Wavenumbers
% Y=Concentration
% Water=WATER SPECTRA

%% EXAMPLE OF TREATING OF PARASITEMIA DATA %%

% 2.1.2.)	Plot data and inspect the spectra for water vapour effects;
% most clearly observed as short, sharp, narrow peaks along the slopes of 
% the amide I and amide II bands. Correct non-horizontal baseline by 
% subtracting baseline spectrum if appropriate. 
plot(wnm,X)

% 2.1.3.)	Reduce noise and/or strong water vapour contributions by smoothing
% the sample and water spectra using 25 points of smoothing 
% or use a water vapour correction method 
SMOOTH=preprocess('default','smooth');
SMOOTH.userdata.width=25;
X2=preprocess('calibrate',SMOOTH,X);% Smoothed DATA

%2.1.4.)	Average water spectra and reduce it to 70% by multiplying it by 0.7
Water=mean(Water)*0.7;
Water=preprocess('calibrate',SMOOTH,Water);

%2.1.5.)	Subtract average water spectra from each sample spectrum 
X3=X2.data-Water.data

%2.1.6.)	Apply a second derivative function on the sample set 
%using 25 points of smoothing and a Savitzky-Golay function.
SAVGOL=preprocess('default','savgol');
SAVGOL.userdata.width=25;
SAVGOL.userdata.deriv=2;
X4=preprocess('calibrate',SAVGOL,X3);

% 2.1.7.)	Normalise data by selecting single normal 
% variate (SNV) function and mean centre data
SNV=preprocess('default','snv');
X5=preprocess('calibrate',SNV,X4);


%2.2.)	Data analysis

%2.2.1.)	Principal Component Analysis (PCA)

%2.2.1.1.)	In the PCA methods, input 7 Principal Components (PCs) and a
%maximum of 100 iterations, and select cross-validation 
%for the validation method. Click Run
PCAmodel=pca(X5,7)


%2.2.1.2.)	Apply a 95% confidence limit
%onto the scores plot between PC1 and PC2. 

%2.2.1.3.)	Sample spectra whose scores occur outside the 
%limit should be marked as potential outliers using the select spectra tool

%2.2.1.4.)	If the marked spectra have high Hotellings T2 
%values and high Q residuals exclude them from further analysis

%2.2.2.)	Partial Least Squares Regression (PLS-R)

%2.2.2.1.)	Open the PLS-R method and input the matrix of the sample 
%concentrations as the Y reference and the sample dataset as X data set
%and select cross-validation as the validation method. Click Run
PLSmodel=pls(X,Y,7)

%2.2.2.2.)	Analyse the regression model; R2 values over 0.80 and 
%root mean square cross-validation error (RMSCVE) values
%that are less than 0.1% parasitemia are acceptable models. 

%2.2.2.3.)	Analyse the regression vector and identify biological bands

