% Main script example for running the SOM algorithm

% Load data file
load Data/ExampleDataset.mat;
[rows, columns] = size(data);

% Necessary parameters, set by user
% Number of Monte Carlo rounds to use in validation
nMonteCarlo = 10;
% Percentage of data to use in each validation round
percentMonteCarlo = 10;
% Minimum SOM grid size
minGridSize = 5;
% Maximum SOM grid size
maxGridSize = 30;

% Testset selection
[data2, testData, testIndex] = SelectSet(data, percentMonteCarlo);

% Dataset normalization. Remember to DeNormalize before testerror calculation!
% Normalization does not do anything to the missing values
[dataNorm, normMeans, normStds] = Normalize(data2, 'meancols', 'stdcols');

% Results
valErrors = ones(nMonteCarlo, maxGridSize) * inf;

%% Learning part
for mc = 1:nMonteCarlo
  % Validationset selection
  [dataMC, valData, valIndex] = SelectSet(dataNorm, percentMonteCarlo);
  
  % SOM estimation
  outputs = SOM(dataMC, valIndex, minGridSize:maxGridSize);
  
  % Validation error calculation for each SOM size
  valErrors(mc, :) = mean((outputs - repmat(valData, maxGridSize, 1)) .^2, 2);
end


%% Testing part
% Selecting the grid size with smallest validation error ...
[aa, bb] = min(mean(valErrors,1));

% ... and plotting the validation errors for all grid sizes
figure;
plot(mean(valErrors,1));
drawnow;

% Now the test itself
outputsTest = SOM(dataNorm, testIndex, bb);

dataFilled = dataNorm;
dataFilled(testIndex) = outputsTest(bb,:);

% Denormalization, using the reversed order than in Normalization
dataFilled = DeNormalize(dataFilled, 'std', normStds, 'mean', normMeans);

testError = mean((dataFilled(testIndex) - testData) .^2)


%% Final filling of the data set
% In case there's missing values
if any(isnan(data))
  % Taking all originally missing values in the dataset
  allMissing = find(isnan(data));
  
  % Dataset normalization. Remember to DeNormalize!
  % Normalization does not do anything to the missing values
  [dataNorm, normMeans, normStds] = Normalize(data, 'meancols', 'stdcols');

  % Filling
  outputsFinal = SOM(dataNorm, allMissing, bb);
  dataFilled = dataNorm;
  dataFilled(allMissing) = outputsFinal(bb,:);
  
  % DeNormalizing the filled dataset
  dataFilled = ...
    DeNormalize(dataFilled, 'std', normStds, 'mean', normMeans);
  
  dataFilledFinal = data;
  dataFilledFinal(isnan(data)) = dataFilled(isnan(data));
end

% End

