1.正常使用selenium访问百度:调试模式会被浏览器检测到。
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
if __name__ == '__main__':
# 初始化驱动
service = ChromeService(executable_path=ChromeDriverManager().install())
# 配置
options = webdriver.ChromeOptions()
# 获取浏览器实例
driver = webdriver.Chrome(service=service, options=options)
# 访问百度
driver.get("https://www.baidu.com/")
time.sleep(5)
# 销毁实例
driver.quit()
2.使用代理浏览器selenium访问百度:真正的用户操作
import os
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
if __name__ == '__main__':
# 本地谷歌浏览器地址
chrome_path = "C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
# 打开代理浏览器
os.popen(rf'"{chrome_path}" --remote-debugging-port=9222 --user-data-dir="C:\selenum\AutomationProfile"')
# 初始化驱动
service = ChromeService(executable_path=ChromeDriverManager().install())
# 配置
options = webdriver.ChromeOptions()
# 配置代理参数
options.add_experimental_option('debuggerAddress', 'localhost:9222')
# 获取浏览器实例
driver = webdriver.Chrome(service=service, options=options)
# 访问百度
driver.get("https://www.baidu.com/")
time.sleep(5)
# 销毁实例
driver.quit()
3.selenium本地代理中间件:
class SeleniumMiddleware(object):
def __init__(self):
super().__init__()
# 本地谷歌浏览器地址
chrome_path = rf"C:\Users\Lenovo\AppData\Local\Google\Chrome\Application\chrome.exe"
# 打开代理浏览器
os.popen(rf'"{chrome_path}" --remote-debugging-port=9222 --user-data-dir="C:\selenum\AutomationProfile"')
def process_response(self, request, response, spider):
# 初始化驱动
service = ChromeService(executable_path=ChromeDriverManager().install())
# 配置代理
options = webdriver.ChromeOptions()
options.add_experimental_option('debuggerAddress', '127.0.0.1:9222')
# 获取浏览器实例
driver = webdriver.Chrome(service=service, options=options)
# 访问网页
driver.get(request.url)
# 最小化
driver.minimize_window()
# 全屏
driver.maximize_window()
# 指定大小
driver.set_window_rect(0, 0, 1000, 500)
# 获取关键字输入框
input_element = WebDriverWait(driver, timeout=3).until(lambda d: d.find_element(By.ID, "kw"))
# 输入搜索关键字
input_element.send_keys("苍穹之跃" + str(random.randint(0, 9)))
# 获取【百度一下】按钮
search_button_element = WebDriverWait(driver, timeout=3).until(lambda d: d.find_element(By.ID, "su"))
# 点击
search_button_element.click()
# 动态加载后的网页
html = driver.page_source
# 退出浏览器
driver.quit()
return scrapy.http.HtmlResponse(url=request.url, body=html.encode('utf-8'), encoding='utf-8', request=request)
开启中间件:
DOWNLOADER_MIDDLEWARES = {
'testproject.middlewares.SeleniumMiddleware': 543,
}
4.selenium本地代理中间件-多线程并发:
未实现
相关文章
发表评论