import scrapy
class Git2Spider(scrapy.Spider):
name =
'git2'
allowed_domains = [
'github.com'
]
start_urls = [
'http://github.com/login'
]
def parse(self, response):
username =
'GitLqr'
password =
'balabala'
# 从登录页面响应中解析出post数据
token = response.xpath(
'//input[@name="authenticity_token"]/@value'
).extract_first()
post_data = {
'commit'
:
'Sign in'
,
'authenticity_token'
: token,
'login'
: username,
'password'
: password,
'webauthn-support'
:
'supported'
,
}
print(post_data)
# 针对登录url发送post请求
yield scrapy.FormRequest(
url=
'https://github.com/session'
,
callback=self.after_login,
formdata=post_data
)
def after_login(self, response):
yield scrapy.Request(
'https://github.com/GitLqr'
, callback=self.check_login)
def check_login(self, response):
print(response.xpath(
'/html/head/title/text()'
).extract_first())