功能描述:
聯(lián)系:highspeedlogic
QQ :1224848052
微信:HuangL1121
郵箱:1224848052@qq.com
網(wǎng)站:http://www.mat7lab.com/
網(wǎng)站:http://www.hslogic.com/
微信掃一掃:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This is the main program which does initializations and calls all other programs %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Close figures, start the clock, define constants
clear, close, tic
global nic % these globals are needed in atraingd
global nh1c
global noc
global nia
global nh1a
global noa
dsp=0; % display or not
batch=1; % batch or non-batch training
nst=4; % how many states
twelve=12; % use degrees
N=6000; % maximum number of steps to try
gamma=0.9; % critic discount factor
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% For tracking problems, define ystar here %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%ystar=zeros(1,N+3); for t=1:N+3, ystar(t)=sin(2*pi*(t-1)/100); end
%for t=1:N+3, ystar(t)=(sin(2*pi*(t-1)/50)+sin(2*pi*(t-1)/30)); end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Define the critic network; cnet %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
disp(' '); disp('Define the critic network'), disp(' ');
critic_total=1; nc=[5 6 1]; nic=nc(1); nh1c=nc(2); noc=nc(3);
%Mcp=[1.5; 10*twelve/20; 2.4; twelve/4; 1]; mcp=-Mcp; % x_dot theta_dot x theta u
Mcp=[twelve/4; 10*twelve/20; 2.4; 1.5; 1]; mcp=-Mcp; % theta theta_dot x x_dot u
cnet_in=[-ones(nic,1) ones(nic,1)];
%cnet=newff(cnet_in, [nh1c noc],{'tansig','purelin'},'ctraingd');
cnet=newff(cnet_in, [nh1c noc],{'tansig','purelin'},'traingd');
% ctraingd = traingd with display turned off
cnet.trainParam.epochs = 200; cnet.trainParam.show = 500;
cnet.trainParam.goal = 1e-5; cnet.trainParam.lr = 0.15;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Define the action network; anet %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
disp(' '); disp('Define the action network'), disp(' ');
action_total=200; na=[4 6 1]; nia=na(1); nh1a=na(2); noa=na(3);
Map=Mcp(1:4); map=-Map;
anet_in=[-ones(nia,1) ones(nia,1)];
anet=newff(anet_in, [nh1a noa],{'tansig','tansig'},'atraingd');
% atraingd = traingd without updating cnet and with no displays
anet.trainParam.epochs = action_total; anet.trainParam.show = 500;
anet.trainParam.goal = 2e-3; anet.trainParam.lr = 0.2;
if batch==0
cnet.trainParam.epochs = 50; anet.trainParam.lr = 0.1; % non-batch
end
anet.performFcn = 'mse';
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Call wnet to construct a network for anet-cnet %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wnet
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Continuously train cnet and anet until good ones are obtained %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for run=1:1000
% Every run should use a different seed; can also assign a seed
rn=8241; nc=clock; nc(1)=0; nc(3)=2*nc(3); nc(4)=5*nc(4); nc(6)=60*nc(6);
rn=fix(516*sum(nc)+rn); if rn>1.5e9, rn=fix(rn/5e6); end, rand('seed',rn);
%rand('seed',1551111); % set seed
rseed=rand('seed'); fprintf('Random seed is %d\n\n',rseed);
% Initialize cnet and anet % do not use cnet=init(cnet); anet=init(anet);
cw=getx(cnet); [m,n]=size(cw); cw=2*(0.2*rand(m,n)-0.1); cnet=setx(cnet,cw);
cw=getx(anet); [m,n]=size(cw); cw=2*(0.2*rand(m,n)-0.1); anet=setx(anet,cw);
disp('Initialize cnet and anet'); disp(' ');
if dsp==1
for i=1:27
fprintf('---');
end
fprintf('\n');
end
for trial=1:100
% Empty all vectors and matrices
u=zeros(noa,N); xstar=zeros(nst,N); t=1; fall=0; UU=[];
tarc=1.234; inputct=[]; targetct=[];
% Set plant's initial states & calculate the initial action for t=1
pout=zeros(nst,1); pout(1)=(rand*twelve*2-twelve)/twelve/2;
xstar(1,1)=pout(1); xstar(:,1)=pout;
for i=1:27, fprintf('---'); end, fprintf('\n');
fprintf('Initial states: %g, %g, %g, %g\n',pout);
aino=[pout(4); pout(2); pout(3); pout(1)];
ain=tramnmx(aino,map,Map);
ano=0.1*rand-0.05; u1=0*ano+sim(anet,ain); u(:,t)=u1;
cino=[ain; u1];
% The cycle starts here
while fall==0 && t<=N,
% Apply action to the plant and determine the output
pin=pout; uin=u1;
pout=plant(pin,uin);
if abs(pout(1))>twelve || abs(pout(3))>2.4, fall=1; end
% Increment time by 1 and store states starting from t=2
t=t+1; xstar(:,t)=pout;
% Calculate and store the action from t=2
aino=[pout(4); pout(2); pout(3); pout(1)];
ain=tramnmx(aino,map,Map);
ano=0.1*rand-0.05; u1=0*ano+sim(anet,ain); u(:,t)=u1;
cin=[ain; u1];
% Calculate and store the utility function
U=[]; U=[U fall]; UU=[UU fall]; % U for non-batch and UU for batch
% If it is a success, save the results
if t>N && fall==0,
eval(['save all-' num2str(run) '-' num2str(trial)])
fprintf('The pole has been balanced for %d steps.\n',t);
break
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Cnet non-batch training here
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% The input to cnet starting from t=1 and t=2
inputc=tramnmx(cino,mcp,Mcp);
inputct=[inputct inputc];
inputc1=tramnmx(cin,mcp,Mcp); cino=cin;
Qold=sim(cnet,inputc);
Q=sim(cnet,inputc1); targetc=U+gamma*Q;
% Non-batch cnet training
if batch==0,
for c_cycle=1:critic_total
Q=sim(cnet,inputc1); targetc=U+gamma*Q;
[cnet,ctr]=train(cnet,inputc,targetc); % non-batch training
end
targetct=[targetct targetc]; tarc=[tarc targetc];
end
% Displays etc.
if dsp==1
if t==2
fprintf(' %2d-%d-%d, f=%7.4f, tht=%8.4f\n',t-1,trial,run,uin,pin(1));
end
if t<200
fprintf(' %2d-%d-%d, f=%7.4f, tht=%8.4f\n',t,trial,run,u1,pout(1));
end
%fprintf(', tarc=%6.3f, oldc=%6.3f\n',targetc,Qold); end
end %dsp
if t==fix(t/500)*500
fprintf('Trial %d has lasted for %d steps.\n',trial,t);
end
end % while
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Training of cnet and anet here; batch mode only
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Training only if failure and small norm
if fall~=0
fprintf('Trial %d was %d steps (in run %d).\n',trial,t,run);
% Weight scaling before training to prevent local minima
cw=getx(anet); norma=norm(cw,inf);
if norma>=0.9
norma=norm(cw,1); cw=cw/norma;
[m,n]=size(cw); cw=1.5*(0.2*rand(m,n)-0.1);
anet=setx(anet,cw); norma=norm(cw,inf);
cw=getx(cnet); normc=norm(cw,1); cw=cw/normc;
[m,n]=size(cw); cw=1.5*(0.2*rand(m,n)-0.1);
cnet=setx(cnet,cw); normc=norm(cw,inf);
else
if batch==1,
% Cnet batch training
inputb=[xstar(4,1:t-1); xstar(2,1:t-1); xstar(3,1:t-1); xstar(1:t-1); ...
u(:,1:t-1)];
inputc=tramnmx(inputb,mcp,Mcp);
inputb=[xstar(4,2:t); xstar(2,2:t); xstar(3,2:t); xstar(1,2:t); u(:,2:t)];
inputc1=tramnmx(inputb,mcp,Mcp);
for c_cycle=1:critic_total
Qc=sim(cnet,inputc1); targetc=UU+gamma*Qc;
for ic=1:t-1, targetc(ic)=gamma^(t-ic+1); end
[cnet,ctr]=train(cnet,inputc,targetc);
end
tarc=[targetc 0];
end
fprintf('Rescale: anet norm =');
fprintf(' %6.4f, cnet = %6.4f, gamma = %g.\n',norma,normc,gamma);
% Assign cnet to part of wnet
for i=1:nia, wnet.iw{3,nia+i}=cnet.iw{1,1}(:,i); end
wnet.lw{3,2}=cnet.iw{1,1}(:,nia+1:nic); wnet.b{3}=cnet.b{1};
wnet.lw{4,3}=cnet.lw{2,1}; wnet.b{4}=cnet.b{2};
% Assign anet to part of wnet
for l=1:2, wnet.b{l}=anet.b{l}; end
for i=1:nia, wnet.iw{1,i}=anet.iw{1,1}(:,i); end
wnet.lw{2,1}=anet.lw{2,1};
% Determine the inputs and taregts for anet training
xb=[xstar(4,1:t); xstar(2,1:t); xstar(3,1:t); xstar(1,1:t)];
xa=tramnmx(xb,map,Map); inpa=[]; inpa=[inpa [xa; xa]];
inputa=mat2cell(inpa,ones(1,2*nia),t);
targetb=0*ones(1,t); targeta={targetb};
[wnet,wtr]=train(wnet,inputa,targeta);
% Read the anet out from wnet
for l=1:2, anet.b{l}=wnet.b{l}; end
amat=[]; for i=1:nia, amat=[amat wnet.iw{1,i}]; end
anet.iw{1,1}=amat; anet.lw{2,1}=wnet.lw{2,1};
cw=getx(cnet); normc=norm(cw,inf);
cw=getx(anet); norma=norm(cw,inf);
fprintf('Trained: anet norm =');
fprintf(' %6.4f, cnet = %6.4f, gamma = %g.\n',norma,normc,gamma);
% Summary displays here
if dsp==1 && fall~=0
inputb=[xstar(4,1:t); xstar(2,1:t); xstar(3,1:t); xstar(1,1:t); u(:,1:t)];
inputc=tramnmx(inputb,mcp,Mcp);
Qc=sim(cnet,inputc); Q=[Q Qc];
xb=[xstar(4,1:t); xstar(2,1:t); xstar(3,1:t); xstar(1,1:t)];
xa=tramnmx(xb,map,Map); inpa=[]; inpa=[inpa [xa; xa]];
inputa=mat2cell(inpa,ones(1,2*nia),t);
new1=sim(wnet,inputa); new2=cell2mat(new1);
neww=new2(2,:);
for i=1:t
fprintf(' %2d-%d-%d, f=%4.1f, tht=%5.1f, tarc=%6.3f',...
i,trial,run,u(1,i),xstar(1,i),tarc(i));
fprintf(', newc=%6.3f, neww=%6.3f\n',Q(i),neww(i));
end
end % dsp and fall
end % norma
end % fall
%if fall~=0, fprintf('Trial %d was %d steps.\n',trial,t); end
if t>N,
fprintf('The pole was balanced in trial %d.\n',trial);
cw=getx(anet); norma=norm(cw,inf);
fprintf(' .... anet norm = %g\n',norma);
fprintf(' .... aino = %g, %g, %g, %g\n',aino);
fprintf(' .... ain = %g, %g, %g, %g\n',ain); break
end
end % trial
if t>N, fprintf('The pole balancing was successful in run %d.\n',run); end
fprintf('\n'); fprintf('\n'); for i=1:28, fprintf('###'); end, fprintf('\n');
end % run
hr=fix(toc/3600); mint=fix(toc/60)-hr*60; sec=toc-hr*3600-mint*60;
disp([' Time used =',int2str(hr),':',int2str(mint),':',int2str(sec)]);
disp([' Program finished at :',int2str(fix(clock))]);