function net=main() % Putting this in terms of a function so we can have the subroutines under % the function. This is an attempt at an neural net in a single file. % The net is defined by the following, which are indexed 1,2: % P, S, dS, Delta, W, b %% Data: load irisdata2 [Xs,Params]=StandardScaler(X,4); [Xtrain,Xtest,Ttrain,Ttest]=TrainTestSplit(Xs,T,0.3); %% Define parameters: alpha=0.1; %Learning parameter for Stochastic Gradient Descent NumEpochs=800; %Number of Epochs [xdim,Numpts]=size(Xtrain); [tdim,~]=size(Ttrain); NetNodes=[xdim, 10, tdim]; %% Define/Initialize Structures of the network P{1}=zeros(NetNodes(2),1); S{1}=zeros(NetNodes(2),1); dS{1}=zeros(NetNodes(2),1); Delta{1}=zeros(NetNodes(2),1); P{2}=zeros(NetNodes(3),1); S{2}=zeros(NetNodes(3),1); dS{2}=zeros(NetNodes(3),1); Delta{2}=zeros(NetNodes(3),1); W{1}=randn(NetNodes(2),NetNodes(1)); b{1}=randn(NetNodes(2),1); W{2}=randn(NetNodes(3),NetNodes(2)); b{2}=randn(NetNodes(3),1); err=zeros(1,NumEpochs); % Main Training Loop - for j=1:NumEpochs err(j)=0; for k=1:Numpts % Forward Pass: P{1}=W{1}*Xtrain(:,k)+b{1}; S{1}=activate(P{1}); dS{1}=dactivate(P{1}); P{2}=W{2}*S{1}+b{2}; S{2}=P{2}; dS{2}=ones(size(P{2})); % Backwards Pass: Delta{2}=Ttrain(:,k)-S{2}; Delta{1}=(W{2})'*Delta{2}.*dS{1}; % Update Weights and Biases dW{1}=Delta{1}*Xtrain(:,k)'; db{1}=Delta{1}; dW{2}=Delta{2}*(S{1})'; db{2}=Delta{2}; W{1}=W{1}+alpha*dW{1}; b{1}=b{1}+alpha*db{1}; W{2}=W{2}+alpha*dW{2}; b{2}=b{2}+alpha*db{2}; err(j)=err(j)+norm(Ttrain(:,k)-S{2}); end end % Although we shouldn't really do this, sometimes its good to know that you % have a small error on your training set to be sure your algorithm is % correct. Ztrain=W{2}*(activate(W{1}*Xtrain + b{1}))+b{2}; [~,t1]=max(Ztrain,[],1); [~,t2]=max(Ttrain,[],1); [Ct,et]=conf_matrix(t1,t2,3); Ct % Now compute our "real" estimate of the error: Zout=W{2}*(activate(W{1}*Xtest + b{1}))+b{2}; [~,t1]=max(Zout,[],1); [~,t2]=max(Ttest,[],1); [C,ee]=conf_matrix(t1,t2,3); C % This section is here if you want to save or look at the data: net.W{1}=W{1}; net.W{2}=W{2}; net.b{1}=b{1}; net.b{2}=b{2}; net.Xtrain=Xtrain; net.Xtest=Xtest; net.Ttrain=Ttrain; net.Ttest=Ttest; net.err=err; net.ScalingParams=Params; end function y=activate(x) % Default for now is sigmoidal (Matlab's logsig, but we'll compute it) y=1./(1+exp(-x)); end function dy=dactivate(x) y=activate(x); dy=y.*(1-y); end function [C,err]=conf_matrix(actual_vals,predicted_vals,N) % function [C,err]=conf_matrix(actual_vals,predicted_vals,N) % Input: Vector of actual classes and predicted classes (in that order). % Also: number of classes, N. % ** It is assumed that the classes are 1, 2, 3, ..., N. % Output: Confusion matrix C and overall error rate in err. p=length(actual_vals); % This should be the total number of data. C=zeros(N,N); for j=1:p C( predicted_vals(j), actual_vals(j) )=C(predicted_vals(j),actual_vals(j))+1; end err=1 - sum(diag(C)/p); end function [X,Params]=StandardScaler(X,n) % Function params=StandardScaler(X,n) % Input: Data matrix X with dimension n (can be n x p or p x n). % Output: Scaled matrix X % Params.m = mean used for scaling (n-dim vector) % Params.s = std used for scalaing (n-dim vector) [mm,nn]=size(X); if mm==n % Matrix is n x p m=mean(X,2); s=std(X,0,2); X=(X-m)./s; elseif nn==n % Matrix is p x n m=mean(X,1); s=std(X,0,1); X=(X-m)./s; else error('Dimension mismatch in StandardScaler\n'); end Params.m=m; Params.s=s; end function [Xtrain,Xtest,Ttrain,Ttest]=TrainTestSplit(X,T,p) % input: Data in X, T, and percentage of test, like 0.3 % We assume that the data is arranged as dim x numpts [~,c1]=size(X); [~,c2]=size(T); if c1~=c2 error('Dimension mismatch in TrainTest\n'); end NumTest=floor(p*c1); NumTrain=c1-NumTest; idx=randperm(c1); tridx=idx(1:NumTrain); teidx=idx(NumTrain+1:end); Xtrain=X(:,tridx); Xtest=X(:,teidx); Ttrain=T(:,tridx); Ttest=T(:,teidx); end