function [X,y_eval,y] = bayes_optim(X,y_eval,LB,UB,f,max_iter,run_name)

d      = size(X,1);
max_EI = -inf;
EI_th  = 1e-46;
count  = 1;
val_EI = zeros(1,max_iter);

options_ei = optimoptions('fmincon','GradObj','off','Display','off');
A = []; b = []; Aeq = []; beq = [];nonlcon = [];

while(-max_EI > EI_th)
    y                      = (y_eval - mean(y_eval)) / std(y_eval);
    ymin                   = min(y);
    [~, theta_opt]         = GPopt(X,y);
    save(['bayes_opt_',run_name],'X','y_eval','y','theta_opt','val_EI');
    
    if(count > max_iter)
        break;
    end
    
    % multistart optimization to find the max of EI
    EI_fun = @(x)expected_impr(x,X,y,ymin,theta_opt);
    N_ms   = 10*d;
    LHS_X  = lhsdesign(N_ms,d);
    XTEMP  = zeros(d,N_ms);
    VALUE  = zeros(1,N_ms);
    for i = 1:N_ms
        x0                   = (LB + (UB - LB).*LHS_X(i,:))';
        [x_temp, value_temp] = fmincon(EI_fun,x0,A,b,Aeq,beq,LB,UB,nonlcon,options_ei);
        XTEMP(:,i)           = x_temp;
        VALUE(i)             = value_temp;
    end
    [max_EI, index] = min(VALUE);
    x_star          = XTEMP(:,index);
    val_EI(count)   = -max_EI;
    -max_EI
    
    if(-max_EI > 0)
        disp('Computing value at point:');
        for i = 1:d
            disp(['x_',int2str(i),' = ',num2str(x_star(i))]);
        end
        X      = [X x_star];
        y_eval = [y_eval; f(x_star)];
    end
    count = count + 1;
end

end

function value = expected_impr(x,X,y,ymin,theta_opt)

[f_star, f_star_var] = GPpred(X,y,x,theta_opt);
value = (ymin - f_star)*normcdf((ymin - f_star)/sqrt(f_star_var)) + sqrt(f_star_var)*normpdf((ymin - f_star)/sqrt(f_star_var));
value = -value;

end

function [logMLH, theta_opt] = GPopt(X,y)
% X - d x n

% initial guess for the scale and fun variance params
theta0 = [log(ones(1,size(X,1))), log(1), log(.1)]'; % contains init length scales (1,...,d), sigma_f, and sigma_n

% optimize the hyperparameters, using initial guess as a starting point
target              = @(theta)GPlog_mlh(X,y,theta);
options_hyperparams = optimoptions('fminunc','algorithm','trust-region','GradObj','on','Display','off');
[theta_opt, logMLH] = fminunc(target,theta0,options_hyperparams);
theta_opt           = exp(theta_opt);

logMLH = -logMLH;

end

function [f, g] = GPlog_mlh(X,y,theta_log)
% theta(i), i = 1,...,d-2 are the length-scales
% theta(d-1) is sigma_f and theta(d) is sigma_n

theta = exp(theta_log);

n     = size(X,2);
d     = length(theta);
K     = zeros(n);

covfun     = @(xi,xj,theta)      ( theta(d-1)^2 * exp( -(1/2) * (xi - xj)' * diag(theta(1:(d-2)).^-2) * (xi - xj)) );
covfun_der = @(xi,xj,theta,index)( theta(d-1)^2 * exp( -(1/2) * (xi - xj)' * diag(theta(1:(d-2)).^-2) * (xi - xj)) * ( (xi(index)-xj(index))^2 / theta(index)^3 ) );

for i = 1:n
    for j = 1:n
        K(i,j) = covfun(X(:,i),X(:,j),theta);
    end
end

L     = chol(K + theta(d)^2 * eye(n),'lower');
K_inv = L'\(L\eye(n));
logML = -0.5*y'*K_inv*y - sum(log(diag(L))) - n*log(2*pi)/2;

% compute the gradient of log.marg.likelihood
logML_grad = zeros(d,1);
for k = 1:(d-2)
    K_der_temp = zeros(n);
    for i = 1:n
        for j = 1:n
            K_der_temp(i,j) = covfun_der(X(:,i),X(:,j),theta,k);
        end
    end
    logML_grad(k) = 0.5*y'*K_inv*K_der_temp*K_inv*y - 0.5*trace(K_inv*K_der_temp);
end
logML_grad(d-1) = 0.5*y'*K_inv*(2*K/theta(d-1))*K_inv*y - 0.5*trace(K_inv*(2*K/theta(d-1)));
logML_grad(d)   = 0.5*y'*K_inv*2*eye(n)*K_inv*y - 0.5*trace(K_inv*2*eye(n));

f = -logML;
g = -theta.*logML_grad;

end