IF itry >= MaxTries || Success >= MaxSuccess;
IF T < StopTemp || Consec >= MaxConsRej;
Finished = 1;
total_iter = total_iter + itry;
Break;
ELSE
T = CoolSched(T); % Decrease T according to ‘CoolSched’.
total_iter = total_iter + itry;
itry = 1; Success = 1;
END
END
% Random generation of new policy and the reward from the new policy.
nPolicy = round(1+(rand(1)*5));
[NewReward] = function_SemiMarkov(nPolicy);
incNewReward = NewReward-OldReward;
% If the new solution is better than the old solution, replace the old solution with the new solution.
IF (incNewReward > 1e-6)
cPolicy = nPolicy; OldReward = NewReward;
Success = Success+1; Consec = 0;
% Otherwise, accept the new solution with a probability, which follows the Boltzmann distribution.
ELSE
IF (rand > exp((-incNewReward)/(k*T)));
cPolicy = nPolicy;
OldReward = NewReward;
Success = Success+1;
ELSE
Consec = Consec+1;
END
END
END
% Identification of the optimal solution and the maximum value.
OptSol = cPolicy; OptValue = OldReward;
|