%Script file to run the N-armed bandit using the Win-Stay, Lose-Shift strategy

%Initialize Actual Payoffs, Number of times to play, initial value of tau
NumMachines=10;
ActQ=randn(NumMachines,1);  %10 machines
%ActQ=[0.5453,1.0130,-0.2397,-0.5880,-0.4892,0.6382,0.7758,0.4898,0.3438,-1.2270]';
NumPlay=2000;                %Play 100 times
Initialbeta=0.01;                      %Initial value of temperature ("High in beginning")
Endingbeta=0.001;
beta=Initialbeta;
NumPlayed=zeros(NumMachines,1);  %Keep a running sum of the number of times each action is selected
ValPlayed=zeros(NumMachines,1);  %Keep a running sum of the total reward for each action
EstQ=zeros(NumMachines,1);
PayoffHistory=zeros(NumPlay,1);  %Keep a record of our payoffs
Probs=(1/NumMachines)*ones(10,1);

for i=1:NumPlay
    
    %Pick a machine to play:
    [a,Probs]=winstay(EstQ,Probs,beta);
    
    %Play the machine and update EstQ, tau
    Payoff=randn+ActQ(a);
    NumPlayed(a)=NumPlayed(a)+1;
    ValPlayed(a)=ValPlayed(a)+Payoff;
    EstQ(a)=ValPlayed(a)/NumPlayed(a);
    PayoffHistory(i)=Payoff;
    beta=Initialbeta*(Endingbeta/Initialbeta)^(i/NumPlay);
end
[v,winningmachine]=max(ActQ)
NumPlayed
plot(1:10,ActQ,'k',1:10,EstQ,'r')