IF FR == 1 || (FR > 1 && t == 3)
delta = IR + DR * max(Q{t+1}(nStateIdx1,:)) - Q{t}(cStateIdx,cDrug);
ELSEIF FR == 2 || (FR > 2 && t == 2)
nState2 = randi([1,2]); fdHist=[dHist,nState2];
[~,nStateIdx2] = ismember(fdHist,HS{t+2},'rows');
delta = IR + DR * (max(Q{t+1}(nStateIdx1,:))+ max(Q{t+2}(nStateIdx2,:)))/2 - Q{t}(cStateIdx,cDrug);
ELSE
nState2 = randi([1,2]); fdHist1=[dHist,nState2];
nState3 = randi([1,2]); fdHist2=[fdHist1,nState3];
[~,nStateIdx2] = ismember(fdHist1,HS{t+2},'rows');
[~,nStateIdx3] = ismember(fdHist2,HS{t+3},'rows');
delta = IR + DR * (max(Q{t+1}(nStateIdx1,:))+ max(Q{t+2}(nStateIdx2,:))+max(Q{t+3}(nStateIdx2,:)))/3 - Q{t}(cStateIdx,cDrug);
END
% Compute the value function and update the Q-value. Learning rate α = (1/sqrt(n+1)).
dQ = (1/sqrt(n+2))*delta;
Q{t}(cStateIdx,cDrug) = Q{t}(cStateIdx,cDrug)+dQ;
% Computing and saving maximal values of the Q variation
Discrepancy = [Discrepancy,abs(dQ)];
% Computing means all over Q variations values.
IF size(Discrepancy,2) == 100
mDiscrepancy{t} = [mDiscrepancy,mean(Discrepancy,2)];
Discrepancy = [];
END
% Update the probability, SBP and SBPSD for the next state.
t = t+1;
cState = fdHist(1,end);
cStateIdx = nStateIdx;
cProb = fProb(1,nState);
cSBP = fSBP(1,nState);
cSBPSD = fSBPSD(1,nState);
cMT = Maintenance(nState,t+1);
Dostları ilə paylaş: |