Machine Learning 学习 之 Qleaning 学习
来源:互联网 发布:金蝶引出数据失败原因 编辑:程序博客网 时间:2024/05/17 02:07
学习改编自 莫烦Python Qlearning学习教程
import numpy as npimport timenp.random.seed(2) # reproducibleN_STATES = 6 # the length of the 1 dimensional worldACTIONS = ['left', 'right'] # available actionsEPSILON = 0.9 # greedy policeALPHA = 0.1 # learning rateGAMMA = 0.9 # discount factorMAX_EPISODES = 53 # maximum episodesFRESH_TIME = 0.05 # fresh time for one movedef build_q_table(n_states, actions): table=[[0 for j in range(len(actions))] for i in range(n_states)] print(table) # show table return tabledef list_Zero(lis): for li in lis: if li!=0: return False return Truedef max_Index(lis): mmax=-999999 index=0 for j in range(len(lis)): if lis[j]>mmax: mmax=lis[j] index=j return indexdef max_Value(lis): mmax=-999999 index=0 for j in range(len(lis)): if lis[j]>mmax: mmax=lis[j] index=j return mmaxdef choose_action(state, q_table): # This is how to choose an action state_actions = q_table[state] #print state_actions if (np.random.uniform() > EPSILON) or list_Zero(state_actions): # act non-greedy or state-action have no value action_num=np.random.randint(0,len(ACTIONS)) action_name = ACTIONS[action_num] #print action_name else: # act greedy action_num=max_Index(state_actions) action_name = ACTIONS[action_num] return action_name,action_numdef get_feedback(S, A): # This is how agent will interact with the environment if A == 'right': # move right if S == N_STATES - 2: # terminate S_ = 'terminal' R = 1 else: S_ = S + 1 R = 0 else: # move left R = 0 if S == 0: S_ = S # reach the wall else: S_ = S - 1 return S_, Rdef update(S, episode, step_counter): # This is how environment be updated env_list = ['-']*(N_STATES-1) + ['T'] # '---------T' our environment if S == 'terminal': interaction = 'Episode %s: total_steps = %s' % (episode+1, step_counter) print '\r{}'.format(interaction) time.sleep(1) print '\r' else: env_list[S] = 'o' interaction = ''.join(env_list) print '\r{}'.format(interaction) time.sleep(FRESH_TIME)def RL(method): # main part of RL loop q_table = build_q_table(N_STATES, ACTIONS) print(q_table) for episode in range(MAX_EPISODES): step_counter = 0 S = 0 is_terminated = False update(S, episode, step_counter) while not is_terminated: A,Anum= choose_action(S, q_table) S_, R = get_feedback(S, A) # take action & get next state and reward if S_ != 'terminal': if method=='Qlearning': q_reward= R + GAMMA * max_Value(q_table[S_]) elif method=='Sarsa': A_,Anum_= choose_action(S_, q_table) q_reward= R + GAMMA * q_table[S_][Anum_] else: q_reward = R # next state is terminal is_terminated = True # terminate this episode q_table[S][Anum] =(1-ALPHA)*q_table[S][Anum]+ALPHA *q_reward #print(q_table) S = S_ # move to next state update(S, episode, step_counter+1) step_counter += 1 return q_tableif __name__ == "__main__": #q_table = RL('Qlearning') q_table = RL('Sarsa') print('\r\nQ-table:\n') print(q_table)
阅读全文
0 0
- Machine Learning 学习 之 Qleaning 学习
- 系统学习Machine Learning之路漫漫
- Machine Learning 学习 之 Kmeans 笔记
- Machine Learning 学习 之 RBF网络
- Machine Learning 学习之朴素贝叶斯
- Machine Learning学习之岭回归
- Machine Learning 学习之神经网络(1)
- Machine Learning 学习之决策树 ID3树
- Machine Learning 学习 之 C4.5
- Machine Learning 机器学习
- machine learning 学习网站
- 机器学习(Machine Learning)
- Machine learning学习(1)
- Machine Learning 学习笔记
- 机器学习 Machine Learning
- Machine Learning学习路线
- 机器学习(Machine Learning)
- machine learning 学习笔记
- CSS3 pointer-events:none应用举例及扩展
- 人工智能算法--KNN算法(C++实现)
- 字符串的选择性拷贝
- 在一个有序的旋转数组中,查找给定值
- Bribe the Prisoners(Easy)
- Machine Learning 学习 之 Qleaning 学习
- 基本运算符
- Android--(14)--通过安卓选择器来修改actionbar的样式与字体样式
- sizeof的特点和用法
- c语言之最大数/阶乘求和、加密
- Machine Learning 学习之朴素贝叶斯
- 初学C语言感想。
- test engineer
- MySQL 高性能编程学习(1)—架构与特性