1.正常使用selenium访问百度:调试模式会被浏览器检测到。

import time

from selenium import webdriver

from selenium.webdriver.chrome.service import Service as ChromeService

from webdriver_manager.chrome import ChromeDriverManager

if __name__ == '__main__':

# 初始化驱动

service = ChromeService(executable_path=ChromeDriverManager().install())

# 配置

options = webdriver.ChromeOptions()

# 获取浏览器实例

driver = webdriver.Chrome(service=service, options=options)

# 访问百度

driver.get("https://www.baidu.com/")

time.sleep(5)

# 销毁实例

driver.quit()

 

2.使用代理浏览器selenium访问百度:真正的用户操作

import os

import time

from selenium import webdriver

from selenium.webdriver.chrome.service import Service as ChromeService

from webdriver_manager.chrome import ChromeDriverManager

if __name__ == '__main__':

# 本地谷歌浏览器地址

chrome_path = "C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"

# 打开代理浏览器

os.popen(rf'"{chrome_path}" --remote-debugging-port=9222 --user-data-dir="C:\selenum\AutomationProfile"')

# 初始化驱动

service = ChromeService(executable_path=ChromeDriverManager().install())

# 配置

options = webdriver.ChromeOptions()

# 配置代理参数

options.add_experimental_option('debuggerAddress', 'localhost:9222')

# 获取浏览器实例

driver = webdriver.Chrome(service=service, options=options)

# 访问百度

driver.get("https://www.baidu.com/")

time.sleep(5)

# 销毁实例

driver.quit()

3.selenium本地代理中间件:

class SeleniumMiddleware(object):

def __init__(self):

super().__init__()

# 本地谷歌浏览器地址

chrome_path = rf"C:\Users\Lenovo\AppData\Local\Google\Chrome\Application\chrome.exe"

# 打开代理浏览器

os.popen(rf'"{chrome_path}" --remote-debugging-port=9222 --user-data-dir="C:\selenum\AutomationProfile"')

def process_response(self, request, response, spider):

# 初始化驱动

service = ChromeService(executable_path=ChromeDriverManager().install())

# 配置代理

options = webdriver.ChromeOptions()

options.add_experimental_option('debuggerAddress', '127.0.0.1:9222')

# 获取浏览器实例

driver = webdriver.Chrome(service=service, options=options)

# 访问网页

driver.get(request.url)

# 最小化

driver.minimize_window()

# 全屏

driver.maximize_window()

# 指定大小

driver.set_window_rect(0, 0, 1000, 500)

# 获取关键字输入框

input_element = WebDriverWait(driver, timeout=3).until(lambda d: d.find_element(By.ID, "kw"))

# 输入搜索关键字

input_element.send_keys("苍穹之跃" + str(random.randint(0, 9)))

# 获取【百度一下】按钮

search_button_element = WebDriverWait(driver, timeout=3).until(lambda d: d.find_element(By.ID, "su"))

# 点击

search_button_element.click()

# 动态加载后的网页

html = driver.page_source

# 退出浏览器

driver.quit()

return scrapy.http.HtmlResponse(url=request.url, body=html.encode('utf-8'), encoding='utf-8', request=request)

开启中间件:

DOWNLOADER_MIDDLEWARES = {

'testproject.middlewares.SeleniumMiddleware': 543,

}

4.selenium本地代理中间件-多线程并发:

未实现

相关文章

评论可见,请评论后查看内容,谢谢!!!
 您阅读本篇文章共花了: