WHILE t < T+1
% Select a drug using the ε-greedy method with increasing 1-(1/log(n+2)).
pn = rand(1);
IF (pn < (1-(1/log(n+2))))
[nil,cDrug] = max(Q{t}(cState,:));
ELSE cDrug = randi([1,size(SS{t},1)]); END
% Update the treatment history.
tHist = [tHist;cDrug];
% Simulate a next state and reward associated to R(s,a,s’).
nState = randi([1,3]);
IF nState ~= 3 % For the alive patients,
dHist = [dHist,nState]; % Update the disease history.
[~,nStateIdx] = ismember(dHist,HS{t+1},'rows');
% Evaluate the one-step reward for the transition from cState to nState where cDrug is used at t, using EvModelDC. fProb is a 1x3 matrix including the transition probabilities from cState to the next health states; and fSBP and fSBPSD are 1x2 matrices including the mean SBPs for the controlled and uncontrolled patients after treatment.
[IR,fProb,fSBP,fSBPSD] = EvModelDC...
(Scenario,t,dHist,cDrug,cProb,cSBP,cSBPSD,cMT,DrugForCVDDM);
% Generate the subsequent health states depending on the future transitions to be considered.
Dostları ilə paylaş: |