1.导包请求

import requests

2.获取古诗文网登录接口 

url = 'https://so.gushiwen.cn/user/login.aspxfrom=http%3a%2f%2fso.gushiwen.cn%2fuser%2fcollect.aspx'

# 请求头

headers = {

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',

}

 3.发送请求,获取登录页面源码

response = requests.get(url=url, headers=headers)

content = response.text

 4.导包获取页面元素

from lxml import etree

 5.使用xpath获取隐藏域值和验证码

tree = etree.HTML(content)

viewstate = tree.xpath('//input[@id="__VIEWSTATE"]/@value')[0]

viewgenerator = tree.xpath('//input[@id="__VIEWSTATEGENERATOR"]/@value')[0]

#验证码

code = tree.xpath('//img[@id="imgCode"]/@src')[0]

 5.将获取验证码的地址拼接成完整的网址

code_url = 'https://so.gushiwen.cn' + code

 6.建立会话,请求验证码

session = requests.session()

response_code = session.get(code_url)

content_code = response_code.content

 7.保存验证码图片

with open('code.jpg','wb') as fp:

fp.write(content_code)

 8.用于人工查看验证码后手动输入

code_name = input('输入验证码')

 9.将获取的隐藏域数据和验证码拼接到post请求的data中

post_url = 'https://so.gushiwen.cn/user/login.aspx?from=http%3a%2f%2fso.gushiwen.cn%2fuser%2fcollect.aspx'

post_data = {

'__VIEWSTATE': viewstate,

'__VIEWSTATEGENERATOR': viewgenerator,

'from: http': '//so.gushiwen.cn/user/collect.aspx',

'email': '123',#使用自己的账号名和密码

'pwd': '123',

'code': code_name,

'denglu': '登录',

}

10.发送请求

response_post = session.post(url=post_url,data=post_data,headers=headers)

 11.接收响应内容

content_post = response_post.text

 12.将响应的内容保存为html格式,手动打开,跳过验证码登录

with open('gushiwen.html','w',encoding='utf-8') as fp:

fp.write(content_post)

 13.将保存的html文件使用浏览器打开

 14.源码

# 导包请求

import requests

# 获取古诗文网登录接口

url = 'https://so.gushiwen.cn/user/login.aspx?from=http%3a%2f%2fso.gushiwen.cn%2fuser%2fcollect.aspx'

# 请求头

headers = {

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',

}

# 发送请求,获取登录页面源码

response = requests.get(url=url, headers=headers)

content = response.text

print(content)

# 导包获取页面元素

from lxml import etree

tree = etree.HTML(content)

# 获取隐藏域值

viewstate = tree.xpath('//input[@id="__VIEWSTATE"]/@value')[0]

print(viewstate)

viewgenerator = tree.xpath('//input[@id="__VIEWSTATEGENERATOR"]/@value')[0]

print(viewgenerator)

# 获取验证码地址

code = tree.xpath('//img[@id="imgCode"]/@src')[0]

print(code)

# 将获取验证码的地址拼接成完整的网址

code_url = 'https://so.gushiwen.cn' + code

print(code_url)

# 建立会话

session = requests.session()

# 会话请求验证码

response_code = session.get(code_url)

content_code = response_code.content

# 保存验证码

with open('code.jpg','wb') as fp:

fp.write(content_code)

# 用于人工查看验证码后手动输入

code_name = input('输入验证码')

post_url = 'https://so.gushiwen.cn/user/login.aspx?from=http%3a%2f%2fso.gushiwen.cn%2fuser%2fcollect.aspx'

# 将获取的隐藏域数据和验证码拼接到post请求的data中

post_data = {

'__VIEWSTATE': viewstate,

'__VIEWSTATEGENERATOR': viewgenerator,

'from: http': '//so.gushiwen.cn/user/collect.aspx',

'email': '123', #使用自己的账号名和密码

'pwd': '123',

'code': code_name,

'denglu': '登录',

}

# 发送请求

response_post = session.post(url=post_url,data=post_data,headers=headers)

# 接收响应内容

content_post = response_post.text

# 将响应的内容保存为html格式,手动打开,跳过验证码登录

with open('gushiwen.html','w',encoding='utf-8') as fp:

fp.write(content_post)

推荐阅读

评论可见,请评论后查看内容,谢谢!!!
 您阅读本篇文章共花了: