% Main script example for running the SOM+EOF algorithm

% Load data file
load Data/ExampleDataset.mat;
[rows, columns] = size(data);

% Necessary parameters, set by user
% Number of Monte Carlo rounds to use in validation
nMonteCarlo = 10;
% Percentage of data to use in each validation round
percentMonteCarlo = 10;
% Minimum SOM grid size
minGridSize = 5;
% Maximum SOM grid size
maxGridSize = 30;
% Initialization method for the EOF
initialization = 'column';
% Maximum number of EOF to use
maxeof = 20;
% Stopping value, defines the minimum amount of change in the data set
% without stopping the EOF process
stopping = 0.1;
% Maximum number of EOF rounds done, if stopping value not fulfilled
rounds = 1000;

% Testset selection
[data2, testData, testIndex] = SelectSet(data, percentMonteCarlo);

% Dataset normalization. Remember to DeNormalize before testerror calculation!
% Normalization does not do anything to the missing values
[dataNorm, normMeans, normStds] = Normalize(data2, 'meancols', 'stdcols');

% Results
valErrorsSOM = ones(nMonteCarlo, maxGridSize) * inf;
valErrorsEOF = ones(nMonteCarlo, maxGridSize, maxeof) * inf;


%% Learning part
for mc = 1:nMonteCarlo
  % Validationset selection
  [dataMC, valData, valIndex] = SelectSet(dataNorm, percentMonteCarlo);
  
  % SOM estimation of all missing values
  allMissing = find(isnan(dataMC));
  mask = isnan(dataMC);
  somInits = SOM(dataMC, allMissing, minGridSize:maxGridSize);
  
  for r = minGridSize:maxGridSize
    % SOM initialization into data
    dataMC(allMissing) = somInits(r,:);
    
    % Validation error calculation for SOM
    valErrorsSOM(mc, r) = mean((dataMC(valIndex) - valData) .^2);
    
    % EOF estimations for the validation set values
    outputs = EOFMulti(dataMC, mask, maxeof, 'original', valIndex, ...
      stopping, rounds);
    
    % Validation error calculation for SOM+EOF
    valErrorsEOF(mc,r,:) = mean((outputs - repmat(valData, maxeof, 1)) .^2, 2);
  end % For loop r
end % For loop mc


%% Testing part
% Selecting the best combination of SOM grid size and number of EOF ...
[aa, bestEOFs] = min(squeeze(mean(valErrorsEOF,1)), [], 2);
[cc, bestSOM] = min(aa)
bestEOF = bestEOFs(bestSOM)

% ... and plotting the validation errors for SOM And SOM+EOF
figure;
plot(mean(valErrorsSOM,1));
hold on;
plot(aa,'r');
drawnow;

% Now the test itself
% Initialization
allMissing = find(isnan(dataNorm));
maskTest = isnan(dataNorm);
somInit = SOM(dataNorm, allMissing, bestSOM);
dataNorm(allMissing) = somInit(bestSOM,:);

% Filling
dataFilledEOF = EOFCore(dataNorm, maskTest, bestEOF, stopping, rounds);

% Denormalization, using the reversed order than in Normalization
dataFilledEOF = ...
  DeNormalize(dataFilledEOF, 'std', normStds, 'mean', normMeans);

% Test error calculation
testError = mean((dataFilledEOF(testIndex) - testData) .^2)


%% Final filling of the data set
% In case there's missing values
if any(isnan(data))
  % Dataset normalization. Remember to DeNormalize before error calculation!
  % Normalization does not do anything to the missing values
  [dataNorm, normMeans, normStds] = Normalize(data, 'meancols', 'stdcols');
  
  % Initialization
  allMissing = find(isnan(dataNorm));
  maskFinal = isnan(dataNorm);
  somInitFinal = SOM(dataNorm, allMissing, bestSOM);
  dataNorm(allMissing) = somInitFinal(bestSOM,:);
  
  % Filling
  dataFilled = EOFCore(dataNorm, maskFinal, bestEOF, stopping, rounds);
  
  % DeNormalizing the filled dataset
  dataFilled = ...
    DeNormalize(dataFilled, 'std', normStds, 'mean', normMeans);
  
  dataFilledFinal = data;
  dataFilledFinal(isnan(data)) = dataFilled(isnan(data));
end

% End

