{"id":3384,"date":"2022-04-01T16:48:39","date_gmt":"2022-04-01T08:48:39","guid":{"rendered":"http:\/\/139.9.1.231\/?p=3384"},"modified":"2022-04-01T19:03:07","modified_gmt":"2022-04-01T11:03:07","slug":"rl_learing","status":"publish","type":"post","link":"http:\/\/139.9.1.231\/index.php\/2022\/04\/01\/rl_learing\/","title":{"rendered":"\u5f3a\u5316\u5b66\u4e60\uff1a"},"content":{"rendered":"\n<p>\u4ee3\u7801\u5b66\u4e60\u7f51\u7ad9\uff1a<\/p>\n\n\n\n<p class=\"has-light-pink-background-color has-background\">\u6559\u7a0b\uff1a<a href=\"https:\/\/stable-baselines.readthedocs.io\/en\/master\/guide\/examples.html\">https:\/\/stable-baselines.readthedocs.io\/en\/master\/guide\/examples.html<\/a><\/p>\n\n\n\n<h2>gym\u4f7f\u7528<\/h2>\n\n\n\n<p class=\"has-light-pink-background-color has-background\"><\/p>\n\n\n\n<p>\u5728\u505arl\u65f6\u5019 \uff0c\u5982\u4f55\u5229\u7528gym\u5c06\u52a8\u753b\u52a8\u8d77\u6765\uff0c\u8ba9\u6bcf\u4e00\u6b65\u8bad\u7ec3\u8fc7\u7a0b\u53ef\u89c6\u5316\uff1a<\/p>\n\n\n\n<p>\u4f8b\u7a0b\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import gym\n\nfrom stable_baselines import DQN\nfrom stable_baselines.common.evaluation import evaluate_policy\n\n\n# Create environment\nenv = gym.make('LunarLander-v2')\n\n# Instantiate the agent\nmodel = DQN('MlpPolicy', env, learning_rate=1e-3, prioritized_replay=True, verbose=1)\n# Train the agent\nmodel.learn(total_timesteps=int(2e5))\n# Save the agent\nmodel.save(\"dqn_lunar\")\ndel model  # delete trained model to demonstrate loading\n\n# Load the trained agent\nmodel = DQN.load(\"dqn_lunar\")\n\n# Evaluate the agent\nmean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)\n\n# Enjoy trained agent\nobs = env.reset()\nfor i in range(1000):\n    action, _states = model.predict(obs)\n    obs, rewards, dones, info = env.step(action)\n    env.render()<\/code><\/pre>\n\n\n\n<figure class=\"wp-block-image\"><img src=\"https:\/\/cdn-images-1.medium.com\/max\/960\/1*h4WTQNVIsvMXJTCpXm_TAw.gif\" alt=\"https:\/\/cdn-images-1.medium.com\/max\/960\/1*h4WTQNVIsvMXJTCpXm_TAw.gif\"\/><\/figure>\n\n\n\n<pre class=\"wp-block-preformatted\"><strong>from<\/strong> <strong>stable_baselines.common.cmd_util<\/strong> <strong>import<\/strong> make_atari_env\n<strong>from<\/strong> <strong>stable_baselines.common.vec_env<\/strong> <strong>import<\/strong> VecFrameStack\n<strong>from<\/strong> <strong>stable_baselines<\/strong> <strong>import<\/strong> ACER\n\n<em># There already exists an environment generator<\/em>\n<em># that will make and wrap atari environments correctly.<\/em>\n<em># Here we are also multiprocessing training (num_env=4 =&gt; 4 processes)<\/em>\nenv = make_atari_env('PongNoFrameskip-v4', num_env=4, seed=0)\n<em># Frame-stacking with 4 frames<\/em>\nenv = VecFrameStack(env, n_stack=4)\n\nmodel = ACER('CnnPolicy', env, verbose=1)\nmodel.learn(total_timesteps=25000)\n\nobs = env.reset()\n<strong>while<\/strong> <strong>True<\/strong>:\n    action, _states = model.predict(obs)\n    obs, rewards, dones, info = env.step(action)\n    env.render()<\/pre>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter\"><img src=\"https:\/\/cdn-images-1.medium.com\/max\/960\/1*UHYJE7lF8IDZS_U5SsAFUQ.gif\" alt=\"https:\/\/cdn-images-1.medium.com\/max\/960\/1*UHYJE7lF8IDZS_U5SsAFUQ.gif\"\/><\/figure><\/div>\n\n\n\n<h2>bug\u89e3\u51b3\uff1a<\/h2>\n\n\n\n<p>\u5728\u6267\u884c\u65f6\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import gym\nenv = gym.make('ALE\/Pong-v5')\nenv.reset()\n\nfor i in range(1000):\n    env.step(env.action_space.sample())\n    env.render()\nenv.close()<\/code><\/pre>\n\n\n\n<p>\u8f93\u51fa\uff0c\u65e0\u6cd5\u5bf9fream\u8fdb\u884c\u6e32\u67d3\u3002<\/p>\n\n\n\n<p><strong>ImportError: cannot import name &#8216;rendering&#8217; from &#8216;gym.envs.classic_control&#8217;.<\/strong><\/p>\n\n\n\n<p>\u89e3\u51b3\u529e\u6cd5\uff1a<\/p>\n\n\n\n<p>\u6253\u5f00 \u5305<strong>gym.envs.classic_control\uff0c\u53d1\u73b0\u6ca1\u6709<\/strong>  <strong>rendering<\/strong> .py\u6587\u4ef6\uff0c\u53bb<a href=\"https:\/\/github.com\/openai\/gym\/blob\/3.10_attempt2\/gym\/envs\/classic_control\/rendering.py\">github<\/a>\uff0c\u53d1\u73b0\uff0cmain\u5206\u652f\u786e\u5b9e\u5df2\u7ecf\u6ca1\u6709\u8fd9\u4e2a\u6587\u4ef6\u4e86\uff0c\u5e94\u8be5\u662f\u7248\u672c\u7684\u95ee\u9898\uff0c\u6700\u65b0\u7248\u672c\u5df2\u7ecf\u53bb\u6389\u4e86\u8be5\u6587\u4ef6\uff0c\u7136\u800c\u5176\u4ed6\u5206\u652f\u662f\u6709\u7684\uff0c\u6240\u4ee5\u5c06\u8be5\u6587\u4ef6\u4e0b\u8f7d\u5e76\u653e\u5728\u5305\u5bf9\u5e94\u4f4d\u7f6e\u3002<\/p>\n\n\n\n<p>\u6b64\u5916\uff0c\u8fd8\u9700\u8981 \u5728\u4ee3\u7801\u4e2d\u52a0\u5165<\/p>\n\n\n\n<p class=\"has-light-pink-background-color has-background\">from gym.envs.classic_control import rendering<\/p>\n\n\n\n<p>\u5bfc\u5165rendering.py<\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" width=\"1024\" height=\"682\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/04\/image-1-1024x682.png\" alt=\"\" class=\"wp-image-3399\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/04\/image-1-1024x682.png 1024w, http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/04\/image-1-300x200.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/04\/image-1-768x511.png 768w, http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/04\/image-1.png 1265w\" sizes=\"(max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4ee3\u7801\u5b66\u4e60\u7f51\u7ad9\uff1a \u6559\u7a0b\uff1ahttps:\/\/stable-baselines.readthedocs.io\/en\/m &hellip; <a href=\"http:\/\/139.9.1.231\/index.php\/2022\/04\/01\/rl_learing\/\" class=\"more-link\">\u7ee7\u7eed\u9605\u8bfb<span class=\"screen-reader-text\">\u5f3a\u5316\u5b66\u4e60\uff1a<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[11,4],"tags":[],"_links":{"self":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/3384"}],"collection":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/comments?post=3384"}],"version-history":[{"count":13,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/3384\/revisions"}],"predecessor-version":[{"id":3400,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/3384\/revisions\/3400"}],"wp:attachment":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/media?parent=3384"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/categories?post=3384"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/tags?post=3384"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}