%Script file to show how the error (on the testing set) can depend
%  on how many (and what type) cluster centers we choose.

%There are two loops below- The inner loop changes the number of centers.
%We run an outer loop so that we can average over 30 different placements
%of these centers.

% This takes a moment to run...

X=randn(1500,2);
Y=exp(-(X(:,1).^2+X(:,2).^2)./4)+0.5*randn(1500,1);  %Actual data

temp=randperm(1500);
Xtrain=X(temp(1:300),:);
Ytrain=Y(temp(1:300),:);

Xtest=X(temp(301:end),:);
Ytest=Y(temp(301:end),:);
for k=1:30
for j=1:20
    NumClusters=j;
    temp=randperm(300);
    C=Xtrain(temp(1:NumClusters),:);

    A=edm(Xtrain,C);
    Phi=rbf1(A,1,3);

    alpha=pinv(Phi)*Ytrain;
    TrainErr(k,j)=(1/length(Ytrain))*norm(Phi*alpha-Ytrain);
    %Compute the error using all the data:
    A=edm(Xtest,C);
    Phi=rbf1(A,1,3);
    Z=Phi*alpha;
    Err(k,j)=(1/length(Ytest))*norm(Ytest-Z);
end
end
figure(1)
plot(mean(TrainErr));
title('Training error tends to always decrease...');
figure(2)
plot(mean(Err));
title('Average error on test set by number of centers used');