对“视觉机器学习20讲配套仿真代码”的研究心得---增强学习

来源:互联网 发布:淘宝云客服考试 编辑:程序博客网 时间:2024/06/06 18:54
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%功能:演示增强学习算法在计算机视觉中的应用
%基于增强学习实现目标分类;
%环境:Win7,Matlab2012b
%Modi: NUDT-VAP
%时间:2014-02-04
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA


global3 % global parameters initialized 


stat=struct('Q',zeros(NS,NA),'iter',0,'old_action',1,'old_state',1,'current_state',1,'rimm',0,'total_reward',0);
done=0; % Pnemonic for simulation, 1 stands for end
        % 0 stands for continue 
while 0==done
    [stat,done]=jump_learn(stat);
end
   policy=pol_finder(stat);




&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&

function action=action_selector(stat)


global NA 




ran=rand(1);


candidate=1;


sum=1/NA;


complete=0;


% Selecting each action with equal probability 


while 0==complete


        if ran<sum
        % action selected 
        action=candidate;
        complete=1;
        
        else          
        % test if ran is associated with next action 
        candidate=candidate+1;
        sum=sum+(1/NA);
        end
end


&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7

function [stat,done]=jump_learn(stat)


global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM


% This function simulates a jump and also updates the learning stats


old_state=stat.old_state;


old_action=stat.old_action;


% Determine current state 


current_state=state_finder(stat);


% Record Feedback in stat 


stat.current_state=current_state;


stat.rimm=TRM(old_state,current_state,old_action);


% DO LEARNING 


stat=qlearn(stat);


% Select next action 


next_action=action_selector(stat);


% Get ready to get out of this function 


stat.old_state=current_state;


stat.old_action=next_action;


if stat.iter>=ITERMAX

% Learning should end 


done=1;

else

done=0;
      
        end














&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7

function policy=pol_finder(stat)


global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM




for state=1:NS


[maxQfactor,index]=max(stat.Q(state,:));


policy(state)=index;


value_function(state)=maxQfactor;


end


policy


value_function


for state=1:NS


      for action=1:NA
      state
      action
      stat.Q(state,action)
      end


end










&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&

function stat=qlearn(stat)


global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA


% Q-Learning 


% Finding the Max factor in the current state 


q_next=max(stat.Q(stat.current_state,:));




stat.iter=stat.iter+1;




%learn_rate=1/(stat.iter);


learn_rate=log(stat.iter+1)/(stat.iter+1);


%learn_rate=0.5*300/(300+stat.iter);


q=stat.Q(stat.old_state,stat.old_action);


q=q*(1-learn_rate)+(learn_rate*(stat.rimm+(LAMBDA*q_next)));


stat.Q(stat.old_state,stat.old_action)=q;








&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7


function candidate=state_finder(stat)


global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM


ran=rand(1);


old_action=stat.old_action;
old_state=stat.old_state;


sum=TPM(old_state,1,old_action);


candidate=1;


complete=0;


while 0==complete


        if ran<sum 
        
        complete=1;
        
        else
        
        candidate=candidate+1;


        sum=sum+TPM(old_state,candidate,old_action);


        end
        
end
                  


&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA




NO_REPLICATIONS=30; % No of replications of simulation 
ITERMAX=10000; % No of iterations of learning 
NA=2; % Number of actions in each state 
NS=2; % Number of states 


LAMBDA=0.8; % discount factor


SMALL=-1000000; 


TPM(:,:,1)=[0.7,0.3;0.4,0.6];


TPM(:,:,2)=[0.9,0.1;0.2,0.8];


TRM(:,:,1)=[6,-5;7,12];
    
TRM(:,:,2)=[10,17;-14,13];


&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA




NO_REPLICATIONS=30; % No of replications of simulation 
ITERMAX=10000; % No of iterations of learning 
NA=2; % Number of actions in each state 
NS=2; % Number of states 


LAMBDA=0.8; % discount factor


SMALL=-1000000; 


TPM(:,:,1)=[0.7,0.3;0.4,0.6];


TPM(:,:,2)=[0.9,0.1;0.2,0.8];


TRM(:,:,1)=[6,5;7,12];
    
TRM(:,:,2)=[10,17;14,13];


&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA




NO_REPLICATIONS=30; % No of replications of simulation 
ITERMAX=10000; % No of iterations of learning 
NA=2; % Number of actions in each state 
NS=2; % Number of states 


LAMBDA=0.8; % discount factor


SMALL=-1000000; 


TPM(:,:,1)=[0.7,0.3;0.4,0.6];


TPM(:,:,2)=[0.9,0.1;0.2,0.8];


TRM(:,:,1)=[6,-5;7,12];
    
TRM(:,:,2)=[12,17;-14,13];


global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA




NO_REPLICATIONS=30; % No of replications of simulation 
ITERMAX=10000; % No of iterations of learning 
NA=2; % Number of actions in each state 
NS=2; % Number of states 


LAMBDA=0.8; % discount factor


SMALL=-1000000; 


TPM(:,:,1)=[0.7,0.3;0.4,0.6];


TPM(:,:,2)=[0.9,0.1;0.2,0.8];


TRM(:,:,1)=[6,-5;7,12];
    
TRM(:,:,2)=[12,17;-14,13];


&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA




NO_REPLICATIONS=30; % No of replications of simulation 
ITERMAX=10000; % No of iterations of learning 
NA=2; % Number of actions in each state 
NS=2; % Number of states 


LAMBDA=0.8; % discount factor


SMALL=-1000000; 


TPM(:,:,1)=[0.7,0.3;0.4,0.6];


TPM(:,:,2)=[0.9,0.1;0.2,0.8];


TRM(:,:,1)=[16,-5;7,12];
    
TRM(:,:,2)=[0,17;-14,13];


&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7


0 0