%% Take home exam, Question 3:

%% The following is excerpted from "BanditScript01.m".
% Note that we don't need most of it- Mainly just the line that calls
% banditE, so feel free to delete what you don't need.

Ravg=zeros(1000,1);
E=0.4;
for j=1:2000
    m=randn(10,1);
    [As,Q,R]=banditE(1000,m,E);
    Ravg=Ravg+R;
    if mod(j,10)==0
        fprintf('On iterate %d\n',j);
    end
end
Ravg=Ravg./2000;
plot(Ravg);


%% The rest of the script is softmaxScript01.m
% Feel free to edit the script below.  The following commands will clear
% the memory from the previous problem.

clear;
clc;

%% Script file to run the N-armed bandit using the softmax strategy

%Initializations are Here:
NumMachines=10;            %Number of machines
ActQ=randn(NumMachines,1); %This creates our actual payouts
NumPlay=1000;              %Number times to play.

%The variable tau controls the "temp"- Hot means more randomness, cold
%means lock into the highest paying machine.

Initialtau=10;
Endingtau=0.5;
tau=10;

%Some other variables:
NumPlayed=zeros(NumMachines,1); %Keep a running sum of the number of times each action is selected
ValPlayed=zeros(NumMachines,1); %Keep a running sum of the total reward for each action
EstQ=zeros(NumMachines,1);      %Storage space for our estimated payouts
PayoffHistory=zeros(NumPlay,1); %Keep a record of our payoffs


for i=1:NumPlay
    %Pick a machine to play:
    a=softmax(EstQ,tau);
    %Play the machine and update EstQ, tau
    Payoff=randn+ActQ(a);
    NumPlayed(a)=NumPlayed(a)+1;
    ValPlayed(a)=ValPlayed(a)+Payoff;
    
    %Update estimates and store payoff
    EstQ(a)=ValPlayed(a)/NumPlayed(a);
    PayoffHistory(i)=Payoff;
    
    %Update tau for the next round.
    tau=Initialtau*(Endingtau/Initialtau)^(i/NumPlay);
end

[v,winningmachine]=max(ActQ);
winningmachine
NumPlayed
plot(1:10,ActQ,'k',1:10,EstQ,'r')