%% Shuffle the data set XY = [X,Y]; [lens,lenv]=size(X); rowrank = randperm(size(XY, 1)); XY1 = XY(rowrank, :); X_shuffled=XY1(:,1:lenv); Y_shuffled=XY1(:,lenv+1); %% Split the data set according to the value of 'per' %%(At this time, the data set is in accordance with the training set: test set = 2:1) per=66.7; train_size=round(lens*per/100); train=X_shuffled(1:train_size,:); target=Y_shuffled(1:train_size,1); test_x=X_shuffled(train_size:lens,:); test_y=Y_shuffled(train_size:lens,1); %% Set the number of principal components and establish PLS regression model pn1=15; [XL1,YL1,XS1,YS1,BETA1,PCTVAR1,MSE1,STATE] = plsregress(train,target,pn1,'CV',10); % Get the predicted value yfit1 = [ones(size(test_x,1),1) test_x]*BETA1; %% Calculate the R2 for the model with 15 principal components figure(1); plot(1:pn1,100*cumsum(PCTVAR1(2,:)),'-o','Color',[0.23529 0.70196 0.44314]); title('Determination Coefficient R2'); xlabel('Number of Components'); ylabel('Determination Coefficient R2'); %% Establish the plsregression model for 10 principal components pn2=10; [XL2,YL2,XS2,YS2,BETA2,PCTVAR2,MSE2] = plsregress(train,target,pn2,'CV',10); yfit2=[ones(size(test_x,1),1) test_x]*BETA2; %% Plot Determination Coefficient figure(2); plot(1:pn2,100*cumsum(PCTVAR2(2,:)),'-o','Color',[0.23529 0.70196 0.44314]); title('Determination Coefficient'); xlabel('Number of Components'); ylabel('Determination Coefficient'); %% Plot Percent Variance Explained in NIR spectral data figure(3); plot(1:pn2,100*cumsum(PCTVAR2(1,:)),'-o','Color',[0.23529 0.70196 0.44314]); title('Percentage Variance Explained in NIR spectral data'); xlabel('Number of Components'); ylabel('Percent Variance Explained in NIR spectral data'); axis([0 pn2 -inf inf]) ; legend({'PLSR Variance Explained Curve'},'Location','NW') %% Calculate and plot the residuals residuals = test_y - yfit2; figure(3); a1=zeros(1,600-train_size); plot(residuals,'Color',[0.80392 0.36078 0.36078]); title('Residuals of each sample under PLSR'); xlabel('Order of Samples'); ylabel('Residual'); hold on plot(a1,'k'); axis([0 600-train_size -1.5 1.5]) ; legend({'PLSR Residual','Residual==0'},'Location','NW'); %% Plot the MSPECV of PLS regression model figure(4) plot(0:10,MSE2(2,:),'-o','Color',[0.23529 0.70196 0.44314]); title('MSPECV of PLSR'); xlabel('Number of Components'); ylabel('Estimated Mean Squared Prediction Error'); legend({'PLSR'},'location','NE'); %% Calculate the R2 for the model with 10 principal components TSS_1 = sum((test_y-mean(test_y)).^2); RSS_1 = sum((test_y-yfit2).^2); Rsquared_1 = 1 - RSS_1/TSS_1; %% Calcultate the Rp for the model with 10 principal components coeff_PLSR1 = corr(test_y , yfit2);