%% Take home exam, Question 3: %% The following is excerpted from "BanditScript01.m". % Note that we don't need most of it- Mainly just the line that calls % banditE, so feel free to delete what you don't need. Ravg=zeros(1000,1); E=0.4; for j=1:2000 m=randn(10,1); [As,Q,R]=banditE(1000,m,E); Ravg=Ravg+R; if mod(j,10)==0 fprintf('On iterate %d\n',j); end end Ravg=Ravg./2000; plot(Ravg); %% The rest of the script is softmaxScript01.m % Feel free to edit the script below. The following commands will clear % the memory from the previous problem. clear; clc; %% Script file to run the N-armed bandit using the softmax strategy %Initializations are Here: NumMachines=10; %Number of machines ActQ=randn(NumMachines,1); %This creates our actual payouts NumPlay=1000; %Number times to play. %The variable tau controls the "temp"- Hot means more randomness, cold %means lock into the highest paying machine. Initialtau=10; Endingtau=0.5; tau=10; %Some other variables: NumPlayed=zeros(NumMachines,1); %Keep a running sum of the number of times each action is selected ValPlayed=zeros(NumMachines,1); %Keep a running sum of the total reward for each action EstQ=zeros(NumMachines,1); %Storage space for our estimated payouts PayoffHistory=zeros(NumPlay,1); %Keep a record of our payoffs for i=1:NumPlay %Pick a machine to play: a=softmax(EstQ,tau); %Play the machine and update EstQ, tau Payoff=randn+ActQ(a); NumPlayed(a)=NumPlayed(a)+1; ValPlayed(a)=ValPlayed(a)+Payoff; %Update estimates and store payoff EstQ(a)=ValPlayed(a)/NumPlayed(a); PayoffHistory(i)=Payoff; %Update tau for the next round. tau=Initialtau*(Endingtau/Initialtau)^(i/NumPlay); end [v,winningmachine]=max(ActQ); winningmachine NumPlayed plot(1:10,ActQ,'k',1:10,EstQ,'r')