ELSE t=5; END % For the patients died.
END % Reach a terminal state.
END % The end of learning in time t.
% Determine the best drug for each health state in t based on the Q-values.
FOR t = 1:T
FOR h = 1:size(Q{t},1)
[v,idx] = sort(Q{t}(h,:),'descend');
FOR a = 1:size(v,2)
IF The feasibility assumptions is satisfied,
Break
END
[OptV(:,:),OptSol(:,:)] = max(Q{t},[],2);
END
END
END
|