%% AA2_simulate_data
% is a function to simulate a data set, given an alternative model (M,H).
% The function uses the true parameters in AA1_para_true and sets some of them to be zero depending on the alternative model.
% The system is solved and represented in RPKM values, which are then mapped into read counts.
% The data (without initial values) is then simulated according to likelihood using experimental dispersions.

function D = AA2_simulate_data(M,H,repeat)
%% control randomness
seed = 2016 + 100*M + 10*H + repeat;
rng(seed,'twister') % control random number generation

%% call the experimental data file
load data_expt.mat

%% call the true parameters
[logtheta , alpha] = AA1_para_true;

%% parameter dimension

% alternative mechanism, Table 1
switch M
    case 1, logtheta([5,6,11]) = log([ 0 , 0 , 0 ]);
    case 2, logtheta([5,  11]) = log([ 0 , 0     ]);
    case 3, logtheta([  6,11]) = log([ 0     , 0 ]);
    case 4, logtheta([    11]) = log([ 0         ]);
    case 5, logtheta([5,6   ]) = log([     0 , 0 ]);
    case 6, logtheta([5     ]) = log([     0     ]);
    case 7, logtheta([  6   ]) = log([         0 ]);
    case 8, logtheta([      ]) = log([           ]);
end

% alternative hypothesis
switch H
    case 1, alpha = 1;        % pure Th17 population
    case 2, alpha = alpha(1); % 1-dimensional alpha
    case 3, alpha = alpha;    % 3-dimensional alpha
end

%% data simulation

% RPKM_x
switch H
    case {1,2} % all replicates
        % solve MDEs
        theta = exp(logtheta);
        x = A3_MDE_output(theta,D.t,H,[]);
        % RPKM_x
        mRNA_Th0  = [ x.Th0(:,2)  , x.Th0(:,4)  ];          % STAT3_m & RORC_m of Th0
        mRNA_Th17 = [ x.Th17(:,2) , x.Th17(:,4) ];          % STAT3_m & RORC_m of Th17
        x_cont    = mRNA_Th0;                               % STAT3_m & RORC_m - controlled group, Eq. (9)
        x_vari    = (1 - alpha)*mRNA_Th0 + alpha*mRNA_Th17; % STAT3_m & RORC_m - variable group,   Eq. (10) and (11)
        x_mRNA    = [ x_cont(2:end,:) ; x_vari(2:end,:) ];  % STAT3_m & RORC_m, without initial values
        RPKM_x    = repmat(x_mRNA,[1 1 3]);                 % repeat for 3 replicates
        
    case 3 % each replicate
        RPKM_x = zeros(18,2,3);
        for k = 1:3
            % solve MDEs
            theta = exp(logtheta);
            x = A3_MDE_output(theta,D.t,H,k);
            % RPKM_x
            mRNA_Th0        = [ x.Th0(:,2)  , x.Th0(:,4)  ];                % STAT3_m & RORC_m of Th0
            mRNA_Th17       = [ x.Th17(:,2) , x.Th17(:,4) ];                % STAT3_m & RORC_m of Th17
            x_cont          = mRNA_Th0;                                     % STAT3_m & RORC_m - controlled group, Eq. (9)
            x_vari          = (1 - alpha(k))*mRNA_Th0 + alpha(k)*mRNA_Th17; % STAT3_m & RORC_m - variable group,   Eq. (12)
            x_mRNA          = [ x_cont(2:end,:) ; x_vari(2:end,:) ];        % STAT3_m & RORC_m, without initial values
            RPKM_x(:,:,k) = x_mRNA;                                         % each replicate
        end
end

% mapping RPKM_x into RC_x
lib_size    = D.lib_size(2:end,:,:);                   % library size for 18 time pts & 3 replicates
lib_size    = repmat(lib_size,[1 2 1]);                % repeat for 2 genes
exon_length = D.exon_length(1:2);                      % exon length of STAT3 & RORC
exon_length = repmat(exon_length,[18 1 3]);            % repeat for 18 time pts & 3 replicates
mu          = RPKM_x .* lib_size .* exon_length / 1e9; % RC_x

% RC_y, read count data
phi  = D.phi(2:end,[1,2],:); % dispersion of STAT3 & RORC, without initial values
R    = 1/phi;                % number of successes
P    = 1 ./ (1 + mu.*phi);   % probability of success
RC_y = nbinrnd(R,P);         % simulated RC data, RC_y ~ NB(RC_x,phi), Eq. (13)

% RPKM_y
RPKM_y = 1e9 * RC_y ./ lib_size ./ exon_length; % simulated RPKM data

%% output, with the experimental initial values
D.RC_y(2:end,[1,2],:)   = RC_y;
D.RPKM_y(2:end,[1,2],:) = RPKM_y;

%% save
filename = ['data_simu_M',num2str(M),'H',num2str(H),'_r',num2str(repeat),'.mat'];
save(filename,'D')

%% PLOT - system output
AA3_plot_data(logtheta,alpha,D,M,H); drawnow

end