import requests
from lxml import etree
#发送Request请求
url =
'https://book.douban.com/subject/1054917/comments/'
head = {
'User-Agent'
:
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36'
}
#解析HTML
r = requests.get(url, headers=head)
s = etree.HTML(r.text)
comments = s.xpath(
'//div[@class="comment"]/p/text()'
)
#
print
(str(comments))#在写代码的时候可以将读取的内容打印一下
''
'
#保存数据open函数
with open(
'D:/PythonWorkSpace/TestData/pinglun.txt'
,
'w'
,encoding=
'utf-8'
)
as
f:#使用with open()新建对象f
for
i in comments:
print
(i)
f.write(i+
'\n'
)#写入数据,文件保存在上面指定的目录,加\n为了换行更方便阅读
''
'
#保存数据pandas函数 到CSV 和Excel
import pandas
as
pd
df = pd.DataFrame(comments)
#
print
(df.head())#head()默认为前5行
df.to_csv(
'D:/PythonWorkSpace/TestData/PandasNumpyCSV.csv'
)
#df.to_excel(
'D:/PythonWorkSpace/TestData/PandasNumpyEx.xlsx'
)