%% Script file to run the N-armed bandit using the softmax strategy %Initializations are Here: NumMachines=10; ActQ=randn(NumMachines,1); %10 machines NumPlay=1000; %Play 100 times Initialtau=10; %Initial tau ("High in beginning") Endingtau=0.5; tau=10; NumPlayed=zeros(NumMachines,1); %Keep a running sum of the number % of times each action is selected ValPlayed=zeros(NumMachines,1); %Keep a running sum of the total % reward for each action EstQ=zeros(NumMachines,1); PayoffHistory=zeros(NumPlay,1); %Keep a record of our payoffs %% Main Loop for i=1:NumPlay %Pick a machine to play: a=softmax(EstQ,tau); %Play the machine and update EstQ, tau Payoff=randn+ActQ(a); NumPlayed(a)=NumPlayed(a)+1; ValPlayed(a)=ValPlayed(a)+Payoff; EstQ(a)=ValPlayed(a)/NumPlayed(a); PayoffHistory(i)=Payoff; tau=Initialtau*(Endingtau/Initialtau)^(i/NumPlay); end %% Display results [v,winningmachine]=max(ActQ); fprintf('The Winning Machine is %d\n',winningmachine); fprintf('It was played %d times\n',NumPlayed(winningmachine)); plot(1:10,ActQ,'k',1:10,EstQ,'r'); legend('Actual','Estimated');