{"id":7272,"date":"2022-09-03T17:14:34","date_gmt":"2022-09-03T09:14:34","guid":{"rendered":"http:\/\/139.9.1.231\/?p=7272"},"modified":"2022-09-03T17:14:35","modified_gmt":"2022-09-03T09:14:35","slug":"python-baidu","status":"publish","type":"post","link":"http:\/\/139.9.1.231\/index.php\/2022\/09\/03\/python-baidu\/","title":{"rendered":"python \u722c\u53d6\u7f51\u7ad9\u56fe\u7247"},"content":{"rendered":"\n<p>   \u5bf9\u4e8e\u505a\u4eba\u5de5\u667a\u80fd\u6765\u8bf4\uff0c\u6700\u4e3b\u8981\u7684\u722c\u53d6\u76ee\u6807\u662f\u56fe\u7247\uff0c\u9700\u8981\u5728\u7f51\u4e0a\u83b7\u53d6\u5927\u91cf\u7684\u56fe\u7247\u6570\u636e\u7528\u4e8e\u6a21\u578b\u8bad\u7ec3\u3002\u8fd9\u91cc\u53c2\u8003\u7f51\u4e0a\u8d44\u6599\uff0c\u81ea\u5df1\u5199\u4e00\u4e2a\u7b80\u5355\u7684\u722c\u866b\u7a0b\u5e8f\u3002<\/p>\n\n\n\n<p>1\u3001\u722c\u53d6\u767e\u5ea6\u56fe\u7247\uff1a<\/p>\n\n\n\n<p>\u767e\u5ea6\u56fe\u7247\u6bd4\u8f83\u7b80\u5355\uff0c\u901a\u8fc7\u4e00\u4e2aajax\u8bf7\u6c42\uff0c\u6765\u83b7\u53d6\u56fe\u7247\u7684url\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"966\" height=\"668\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-26.png\" alt=\"\" class=\"wp-image-7281\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-26.png 966w, http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-26-300x207.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-26-768x531.png 768w\" sizes=\"(max-width: 966px) 100vw, 966px\" \/><\/figure>\n\n\n\n<p>\u53c2\u6570\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"583\" height=\"718\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-27.png\" alt=\"\" class=\"wp-image-7282\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-27.png 583w, http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-27-244x300.png 244w\" sizes=\"(max-width: 583px) 100vw, 583px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" width=\"1024\" height=\"572\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-28-1024x572.png\" alt=\"\" class=\"wp-image-7287\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-28-1024x572.png 1024w, http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-28-300x167.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-28-768x429.png 768w, http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-28.png 1410w\" sizes=\"(max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"714\" height=\"316\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-29.png\" alt=\"\" class=\"wp-image-7288\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-29.png 714w, http:\/\/139.9.1.231\/wp-content\/uploads\/2022\/09\/image-29-300x133.png 300w\" sizes=\"(max-width: 714px) 100vw, 714px\" \/><\/figure>\n\n\n\n<p>2\u3001\u722c\u53d6 \u8c37\u6b4c\u56fe\u7247\uff1a<\/p>\n\n\n\n<p>\u8c37\u6b4c\u8ddf\u767e\u5ea6\u4e0d\u540c\uff0c\u9700\u8981\u4f7f\u7528 selenium <\/p>\n\n\n\n<p>\u7531\u4e8egoogle\u56fe\u7247\u754c\u9762\u662f\u5c5e\u4e8e\u90a3\u79cd\u5f80\u4e0b\u5212\u4f1a\u5728\u672c\u9875\u9762\u4e2d\u52a0\u8f7d\u51fa\u66f4\u591a\u4fe1\u606f\uff0c\u4f46\u672a\u5237\u65b0\u7684\u673a\u5236\uff0c\u4f46\u662f\u5b83\u53c8\u5e76\u672a\u4f7f\u7528ajax\u3002<br>\u6240\u4ee5\u8fd9\u91cc\u6211\u4eec\u4f7f\u7528selenium\u3002selenium\u662f\u4e00\u4e2a\u80fd\u591f\u6a21\u62df\u6d4f\u89c8\u5668\u7684\u5de5\u5177\uff0c\u5982\u679c\u4f60\u6ca1\u6709\u5b89\u88c5\uff0c\u8bf7pip install \u4e00\u4e0b\u3002<br>\u7136\u540e\u662f\u4e0b\u8f7d\u7b26\u5408\u4f60\u7684\u6d4f\u89c8\u5668\u7684\u9a71\u52a8\uff0c\u6211\u8fd9\u91cc\u7528\u7684\u662fChrome\uff0c\u6240\u4ee5\u4e0b\u8f7d\u4e86ChromeDriver\uff0c\u5c06\u5176\u653e\u5728D:\\python\\Scripts\uff08\u4f60\u7684python\u5b89\u88c5\u76ee\u5f55\uff09\u3002<br>\u7528\u8fd9\u4e24\u4e2a\u6765\u6a21\u62df\u7528\u6237\u7684\u6d4f\u89c8\u5668\u64cd\u4f5c\u3002<br><\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>\rfrom selenium import webdriver\r\nfrom selenium.webdriver.common.keys import Keys\r\nfrom selenium.webdriver.chrome.options import Options\r\nimport time\r\nimport os\r\nimport urllib.request\r\nimport uuid\r\n\r\ndef download_pic(url, name, path):\r\n\r\n    if not os.path.exists(path):\r\n        os.makedirs(path)\r\n    res = urllib.request.urlopen(url, timeout=3).read()\r\n    with open(path + name +'.jpg', 'wb') as file:\r\n        file.write(res)\r\n        file.close()\r\n\r\ndef get_image_url(num, key_word):\r\n\r\n    box = driver.find_element_by_xpath('\/html\/body\/div&#91;1]\/div&#91;3]\/form\/div&#91;1]\/div&#91;1]\/div&#91;1]\/div\/div&#91;2]\/input')\r\n    box.send_keys(key_word)\r\n    box.send_keys(Keys.ENTER)\r\n    box = driver.find_element_by_xpath('\/\/*&#91;@id=\"hdtb-msb\"]\/div&#91;1]\/div\/div&#91;2]\/a').click()\r\n\r\n    # \u6eda\u52a8\u9875\u9762\r\n    last_height = driver.execute_script('return document.body.scrollHeight')\r\n    while True:\r\n        driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')\r\n        time.sleep(2)\r\n        new_height = driver.execute_script('return document.body.scrollHeight')\r\n        try:\r\n            driver.find_elements_by_xpath('\/\/*&#91;@id=\"islmp\"]\/div\/div\/div\/div\/div&#91;5]\/input').click()\r\n        except:\r\n            pass\r\n        if new_height == last_height:\r\n            # \u70b9\u51fb\u663e\u793a\u66f4\u591a\u7ed3\u679c\r\n            try:\r\n                box = driver.find_element_by_xpath('\/\/*&#91;@id=\"islmp\"]\/div\/div\/div\/div&#91;1]\/div&#91;2]\/div&#91;2]\/input').click()\r\n            except:\r\n                break\r\n        last_height = new_height\r\n\r\n    image_urls = &#91;]\r\n\r\n    for i in range(1, num):\r\n        try:\r\n            image = driver.find_element_by_xpath('\/\/*&#91;@id=\"islrg\"]\/div&#91;1]\/div&#91;' + str(i) + ']\/a&#91;1]\/div&#91;1]\/img')\r\n            # \u6b64\u9009\u9879\u4e3a\u4e0b\u8f7d\u7f29\u7565\u56fe\r\n            # image_src = image.get_attribute(\"src\")\r\n            image.click() # \u70b9\u5f00\u5927\u56fe\r\n            time.sleep(4)  # \u56e0\u4e3a\u8c37\u6b4c\u9875\u9762\u662f\u52a8\u6001\u52a0\u8f7d\u7684\uff0c\u9700\u8981\u7ed9\u4e88\u9875\u9762\u52a0\u8f7d\u65f6\u95f4\uff0c\u5426\u5219\u65e0\u6cd5\u83b7\u53d6\u539f\u56feurl\uff0c\u5982\u679c\u4f60\u7684\u7f51\u7edc\u72b6\u51b5\u4e00\u822c\u8bf7\u9002\u5f53\u5ef6\u957f\r\n            # \u83b7\u53d6\u539f\u56fe\u7684url\r\n            image_real = driver.find_element_by_xpath('\/\/*&#91;@id=\"Sva75c\"]\/div\/div\/div&#91;3]\/div&#91;2]\/c-wiz\/div\/div&#91;1]\/div&#91;1]\/div&#91;2]\/div&#91;1]\/a\/img')\r\n            image_url = image_real.get_attribute(\"src\")\r\n            image_urls.append(image_url)\r\n            print(str(i) + ': ' + image_url)\r\n        except:\r\n            print(str(i) + ': error')\r\n            pass\r\n    return image_urls\r\nif __name__ == '__main__':\r\n    # \u521b\u5efa\u4e00\u4e2a\u53c2\u6570\u5bf9\u8c61\uff0c\u7528\u6765\u63a7\u5236chrome\u662f\u5426\u4ee5\u65e0\u754c\u9762\u6a21\u5f0f\u6253\u5f00\r\n    ch_op = Options()\r\n    # \u8bbe\u7f6e\u8c37\u6b4c\u6d4f\u89c8\u5668\u7684\u9875\u9762\u65e0\u53ef\u89c6\u5316\uff0c\u5982\u679c\u9700\u8981\u53ef\u89c6\u5316\u8bf7\u6ce8\u91ca\u8fd9\u4e24\u884c\u4ee3\u7801\r\n    ch_op.add_argument('--headless')\r\n    ch_op.add_argument('--disable-gpu')\r\n\r\n    url = \"https:\/\/www.google.com\/\"\r\n    driver = webdriver.Chrome(r'D:\\anconda3\\chromedriver.exe', options=ch_op)\r\n    driver.get(url)\r\n\r\n    key_word = input('\u8bf7\u8f93\u5165\u5173\u952e\u8bcd\uff1a')\r\n    num = int(input('\u8bf7\u8f93\u5165\u9700\u8981\u4e0b\u8f7d\u7684\u56fe\u7247\u6570\uff1a'))\r\n    _path = input('\u8bf7\u8f93\u5165\u56fe\u7247\u4fdd\u5b58\u8def\u5f84,\u4f8b\u5982G:\\\\\\\\google\\\\\\\\images\\\\\\\\ :')\r\n\r\n    # path = \"G:\\\\google\\\\images_download\\\\\" + key_word + \"\\\\\"  # \u56fe\u7247\u4fdd\u5b58\u8def\u5f84\u6539\u4e3a\u81ea\u5df1\u7684\u8def\u5f84\r\n    path = _path + key_word + \"\\\\\"\r\n    print('\u6b63\u5728\u83b7\u53d6\u56fe\u7247url...')\r\n    image_urls = get_image_url(num, key_word)\r\n    for index, url in enumerate(image_urls):\r\n        try:\r\n            print('\u7b2c' + str(index) + '\u5f20\u56fe\u7247\u5f00\u59cb\u4e0b\u8f7d...')\r\n            download_pic(url, str(uuid.uuid1()), path)\r\n        except Exception as e:\r\n            print(e)\r\n            print('\u7b2c' + str(index) + '\u5f20\u56fe\u7247\u4e0b\u8f7d\u5931\u8d25')\r\n            continue\r\n    driver.quit()<\/code><\/pre>\n\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5bf9\u4e8e\u505a\u4eba\u5de5\u667a\u80fd\u6765\u8bf4\uff0c\u6700\u4e3b\u8981\u7684\u722c\u53d6\u76ee\u6807\u662f\u56fe\u7247\uff0c\u9700\u8981\u5728\u7f51\u4e0a\u83b7\u53d6\u5927\u91cf\u7684\u56fe\u7247\u6570\u636e\u7528\u4e8e\u6a21\u578b\u8bad\u7ec3\u3002\u8fd9\u91cc\u53c2\u8003\u7f51\u4e0a\u8d44\u6599\uff0c\u81ea\u5df1\u5199 &hellip; <a href=\"http:\/\/139.9.1.231\/index.php\/2022\/09\/03\/python-baidu\/\" class=\"more-link\">\u7ee7\u7eed\u9605\u8bfb<span class=\"screen-reader-text\">python \u722c\u53d6\u7f51\u7ad9\u56fe\u7247<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[28],"tags":[],"_links":{"self":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/7272"}],"collection":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/comments?post=7272"}],"version-history":[{"count":14,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/7272\/revisions"}],"predecessor-version":[{"id":7294,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/7272\/revisions\/7294"}],"wp:attachment":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/media?parent=7272"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/categories?post=7272"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/tags?post=7272"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}