美文网首页python热爱者
免密,python通过关键字自动获取PDF网盘下载地址!

免密,python通过关键字自动获取PDF网盘下载地址!

作者: 48e0a32026ae | 来源:发表于2018-09-27 15:21 被阅读0次

PDF其实就是电子书籍,但是PDF这个格式允许插图,更加适合人们观看!

有人会说,这是黑客技术?这么强大!

NO,一点都不强大。就比如上次发布的Python无需百度积分下载百度文库一样,不过是一个API的调用!

好了,下面看效果:

源码:

# python3 代码

# 需要安装 requests 和 BeautifulSoup

# 从若兰格提取 pdf

import requests

from bs4 import BeautifulSoup as bp4

import json

BaiDuAPI_URL = "http://ypsuperkey.meek.com.cn/api/v1/items/BDY-{0}?client_version=2018.11"

# 设置代{过}{滤}理

def req(url_para):

headers = {

'Host': 'ypsuperkey.meek.com.cn',

'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0',

'Accept': '*/*',

'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',

'Connection': 'close',

}

req = requests.get(BaiDuAPI_URL.format(url_para),headers = headers)

if req.status_code == 200:

return req.text

return ''

# print(req.text)

# =============================================================================

RUGE_HOME = "http://www.ifblue.net/"

RUGE_SEARCH_URL = "http://www.ifblue.net/search/{0}/page/{1}"

headers = {

'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',

'Accept-Encoding': 'gzip, deflate',

'Accept-Language': 'zh-CN,zh;q=0.9',

'Cache-Control': 'max-age=0',

'Connection': 'keep-alive',

'DNT': '1',

'Host': 'www.ifblue.net',

'Upgrade-Insecure-Requests': '1',

'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',

}

def req_html(url):

req = requests.get(url,headers=headers)

if req.status_code == 200:

return req.text

return ""

def main():

key = input('搜索关键词:')

page = int(input('查询页面:'))

items = []

print('当前搜索为 ' , key)

count = 1

for p_ in range(page):

pp_ = p_ + 1

print('当前页面 : ',pp_)

html = req_html(RUGE_SEARCH_URL.format(key,pp_))

html_obj = bp4(html,"lxml")

# 显示标题和链接

articles = html_obj.find_all( name = 'article',attrs= {'class':"excerpt"})

if(len(articles)< 1):

exit()

for art_ in articles:

item = {}

item['title'] = art_.header.h2.a['title']

item['url'] = art_.header.h2.a['href']

items.append(item)

print('{0:3} {1:40} {2}'.format(count,item['title'],item['url']))

count += 1

# 进行下载

PdfItems = []

c_ = 1

for d_ in items:

print('当前下载',d_['title'],end = ' ')

downurl = d_['url'].replace('.html','').replace('http://www.ifblue.net/','http://www.ifblue.net/download.html?pid=')

# print(downurl)

html = req_html(downurl)

html_obj = bp4(html,"lxml")

dd_ = html_obj.find('center')

baiduurl = dd_.a['href']

# 请求密码

# print(dd_.a['href'])

try:

baidukey = req(baiduurl.replace('https://','').replace('http://','').replace('pan.baidu.com/s/1',''))

# print(downurl)

# print(dd_)

# print(baiduurl)

# print(baidukey)

j_obj = json.loads(baidukey)

PdfItem = {}

PdfItem['id'] = c_

PdfItem['title'] = d_['title']

PdfItem['url'] = baiduurl

PdfItem['code'] = j_obj['access_code']

PdfItems.append(PdfItem)

print('{0} {1}'.format(baiduurl,j_obj['access_code']))

c_ += 1

except Exception as e:

pass

print(' 获取失败 ')

# 进行下载

# 将解析的保存到文件

with open('down.txt','w') as pf:

for t_ in PdfItems:

pf.write('{0:3} 名称: {1} 链接: {2} 密码: {3}'.format(t_['id'],t_['title'],t_['url'],t_['code']))

if __name__ == '__main__':

main()

相关文章

网友评论

    本文标题:免密,python通过关键字自动获取PDF网盘下载地址!

    本文链接:https://www.haomeiwen.com/subject/teveoftx.html