# -*- coding: utf-8 -*-
import scrapy
import re
class GithubLoginSpider(scrapy.Spider):
name =
'github_login'
allowed_domains = [
'github.com'
]
start_urls = [
'https://github.com/login'
]
def parse(self, response):
# 发送Post请求获取Cookies
authenticity_token = response.xpath(
'//input[@name="authenticity_token"]/@value'
).extract_first()
utf8 = response.xpath(
'//input[@name="utf8"]/@value'
).extract_first()
commit = response.xpath(
'//input[@name="commit"]/@value'
).extract_first()
form_data = {
'login'
:
'pengjunlee@163.com'
,
'password'
:
'123456'
,
'webauthn-support'
:
'supported'
,
'authenticity_token'
: authenticity_token,
'utf8'
: utf8,
'commit'
: commit}
yield scrapy.FormRequest(
"https://github.com/session"
, formdata=form_data, callback=self.after_login)
def after_login(self, response):
# 验证是否请求成功
print(re.findall(
'Learn Git and GitHub without any code!'
, response.body.decode()))