%% Script version of a previous function (banditE.m). % % This file can be used to create plots % %% Initialization: N=100; %Number of trials Aq=randn(10,1); %Mean reward for each of the machines. E=0.2; %Epsilon for the epsilon-greedy algorithm numbandits=length(Aq); %Number of Bandits ActNum=zeros(numbandits,1); %Keep a running sum of the number of times % each action is selected. ActVal=zeros(numbandits,1); %Keep a running sum of the total reward % obtained for each action. Q=zeros(1,numbandits); %Current reward estimates As=zeros(N,1); %Storage for action R=zeros(N,1); %Storage for averaging reward %********************************************************************* % Set up a flag so we know when to choose at random (using epsilon) %********************************************************************* greedy=zeros(1,N); if E>0 m=round(E*N); %Total number of times we should choose at random greedy(1:m)=ones(1,m); m=randperm(N); greedy=greedy(m); clear m end if E>=1 error('The epsilon should be between 0 and 1\n'); end %******************************************************************** % % Now we're ready for the main loop %******************************************************************** for j=1:N %STEP ONE: SELECT AN ACTION (cQ) , GET THE REWARD (cR) ! if greedy(j)>0 cQ=ceil(rand*numbandits); cR=randn+Aq(cQ); else [val,idx]=find(Q==max(Q)); m=ceil(rand*length(idx)); %Choose a max at random cQ=idx(m); cR=randn+Aq(cQ); end R(j)=cR; %UPDATE FOR NEXT GO AROUND! As(j)=cQ; ActNum(cQ)=ActNum(cQ)+1; ActVal(cQ)=ActVal(cQ)+cR; Q(cQ)=ActVal(cQ)/ActNum(cQ); % Plot of current estimates plot(1:numbandits, Aq, 1:numbandits, Q); pause(0.2); end %% As, Q, R are what we have computed. % As = Which machine was played. % Q = Reward estimates (Recall Aq are the actual estimates) % R = Vector of payouts. plot(R)