% Main script example for running the EOF Pruning algorithm

% Load data file
load Data/ExampleDataset.mat;
[rows, columns] = size(data);

% Necessary parameters, set by user
% Number of Monte Carlo rounds to use in validation
nMonteCarlo = 10;
% Percentage of data to use in each validation round
percentMonteCarlo = 10;
% Initialization method for the EOF
initialization = 'column';
% Maximum number of EOF to use
maxeof = 50;
% Maximum number of EOF Pruning rounds
eofrounds = 20;

% Testset selection
[data2, testData, testIndex] = SelectSet(data, percentMonteCarlo);

% Dataset normalization. Remember to DeNormalize before testerror calculation!
% Normalization does not do anything to the missing values
[dataNorm, normMeans, normStds] = Normalize(data2, 'meancols', 'stdcols');


%% Learning part
for mc = 1:nMonteCarlo
  % Validationset selections for all MonteCarlo rounds
  eval(['global dataMC' num2str(mc) ' valData' num2str(mc) ' valIndex' ...
    num2str(mc) ' mask' num2str(mc)]);
  eval(['[dataMC' num2str(mc) ', valData' num2str(mc) ', valIndex' ...
    num2str(mc) '] = SelectSet(dataNorm, percentMonteCarlo);']);
  
  % Initialization for all datasets
  eval(['allMissing' num2str(mc) ' = find(isnan(dataMC' num2str(mc) '));']);
  eval(['dataMC' num2str(mc) ' = InitMissing(dataMC' ...
    num2str(mc) ', initialization);']);
end

% EOF estimations for the validation set values
% Datasets, validation data and indexes should be available on the workspace
[valErrors, selectedEOF] = EOFPruning(nMonteCarlo, eofrounds, maxeof);


%% Testing part
% Selecting the pruned EOFs with smallest validation error ...
[aa, bestPrunes] = min(squeeze(mean(valErrors,1)), [], 2);
[cc, bestRound] = min(aa);

% Getting the best set of EOFs
bestEOFs = zeros(bestRound, max(bestPrunes));
for i = 1:bestRound
  bestEOFs(i, 1:bestPrunes(i)) = sort(selectedEOF(i, 1:bestPrunes(i)));
end

% ... and plotting the validation errors for all pruned EOF rounds
figure;
plot(aa);
drawnow;

% Now the test itself
% Initialization
[dataInit, maskTest] = InitMissing(dataNorm, initialization);

% Filling
dataFilled = EOFCore(dataInit, maskTest, bestEOFs, 1, bestRound);

% Denormalization, using the reversed order than in Normalization
dataFilled = DeNormalize(dataFilled, 'std', normStds, 'mean', normMeans);

% Test error calculation
testError = mean((dataFilled(testIndex) - testData) .^2)


%% Final filling of the data set
% In case there's missing values
if any(isnan(data))
  % Dataset normalization. Remember to DeNormalize before error calculation!
  % Normalization does not do anything to the missing values
  [dataNorm, normMeans, normStds] = Normalize(data, 'meancols', 'stdcols');
  
  % Initialization
  [dataInitFinal, maskFinal] = InitMissing(dataNorm, initialization);
  
  % Filling
  dataFilled = EOFCore(dataInitFinal, maskFinal, bestEOFs, 1, bestRound);
  
  % DeNormalizing the filled dataset
  dataFilled = ...
    DeNormalize(dataFilled, 'std', normStds, 'mean', normMeans);
  
  dataFilledFinal = data;
  dataFilledFinal(isnan(data)) = dataFilled(isnan(data));
end

% End

