美文网首页
python 爬取简书首页文章标题和简介

python 爬取简书首页文章标题和简介

作者: fan12 | 来源:发表于2018-12-12 20:51 被阅读0次

import urllib3

urllib3.disable_warnings()

import re

url= 'https://www.jianshu.com'

headers= {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36'

          }

pool= urllib3.PoolManager()

resp= pool.request('GET',url,headers=headers)

url_content= resp.data.decode()

# print(url_content)

# title =re.findall(r'

title=re.findall(r'<a class="title" target="_blank.*?">(.*?)</a>',url_content)

content= re.findall(r'<p class="abstract">(.*?)</p>',url_content,re.S)

# print(title)

j=0

for iin title:

    print(i)

    print(content[j])

    print('=============================================================')

    j+=1

urllib

from urllibimport request

import  re

url= "https://www.jianshu.com"

headers= {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36'

          }

req= request.Request(url,headers=headers)

resp=request.urlopen(req)

page= resp.read().decode()

# print(page)

res=  re.findall(r'<a class="title" target="_blank" .*?>(.*?)</a>.*?<p class="abstract">(.*?)</p>',page,re.S)

for title,articlein res:

    print(title)

    print(article)

    print('=====================================')

相关文章

网友评论

      本文标题:python 爬取简书首页文章标题和简介

      本文链接:https://www.haomeiwen.com/subject/wfxmhqtx.html