美文网首页
下载美拍(舞蹈类)视频--Python

下载美拍(舞蹈类)视频--Python

作者: 52_St | 来源:发表于2018-05-17 00:33 被阅读98次

分析暂时不介绍

import base64
import re
import time

import requests

'''抓取美拍 指定分类 视频'''


class MeiPai:

    def __init__(self):
        self.home_url = 'http://www.meipai.com'
        self.category = {
            '搞笑': ['13', '/squares/new_timeline'],
            '爱豆': ['16', '/squares/new_timeline'],
            '高颜值': ['474', '/squares/new_timeline'],
            '舞蹈': ['63', '/topics/hot_timeline'],
            '精选': ['488', '/squares/new_timeline'],
            '音乐': ['62', '/topics/hot_timeline'],
            '美食': ['59', '/topics/hot_timeline'],
            '美妆': ['27', '/squares/new_timeline'],
            '吃秀': ['423', '/squares/new_timeline'],
            '宝宝': ['18', '/topics/hot_timeline'],
            '宠物': ['6', '/topics/hot_timeline'],
            '手工': ['450', '/topics/hot_timeline'],
            '游戏': ['480', '/topics/hot_timeline'],
            '运动': ['487', '/topics/hot_timeline'],
            '穿秀': ['460', '/topics/hot_timeline'],
        }
        self.category_name = None
        self.video_id = None  # 视频 id
        self.total = 0  # 记录下载视频个数
        self.page = 1  # 初始化开始下载的页数
        self.DEBUG = True  # 默认开启 调试模式,不会真正下载视频

    def tid(self):  # 为网页源码中的 interested_id: (\d+),
        url = self.home_url + '/square/{}'.format(self.category[self.category_name][0])
        html = requests.get(url).text
        tid = re.compile('interested_id: (\d+)').findall(html)[0]
        if tid == '0':
            tid = self.category[self.category_name][0]
        return tid

    def ajax(self, tid, total_page=None):
        """
        :param total_page: 希望下载多少页的视频,每页下载24个,默认无限制
        :type tid: object
        """
        url = self.home_url + self.category[self.category_name][1]
        while 1:
            print('正在下载第{}页...'.format(self.page))
            params = {
                'page': self.page,
                'count': 24,
                'tid': tid,
            }
            js_data = requests.get(url, params=params).json()
            medias = js_data.get('medias')
            for media in medias:  # 每一个page至多有24条数据
                self.video_id = media.get('id')
                if self.video_id:
                    yield media.get('video')
            if self.page == total_page or len(medias) < 24:
                print('共下载了{}页,{}个视频'.format(self.page, self.total))
                break
            time.sleep(1)
            self.page += 1

    #################################################
    @staticmethod
    def decode(code):
        """
        :type code: 解密之前的视频链接
        """
        first_4 = str(int(code[:4][::-1], 16))
        pre = [int(x) for x in first_4[:2]]
        tail = [int(x) for x in first_4[2:]]
        code = code[4:]
        code = code[:pre[0]] + code[pre[0]:].replace(code[pre[0]:pre[0] + pre[1]], '', 1)
        tail[0] = len(code) - sum(tail)
        code = code[:tail[0]] + code[tail[0]:].replace(code[tail[0]:tail[0] + tail[1]], '', 1)
        return base64.b64decode(code).decode()

    def download(self, video_url):
        """
        默认开启 调试模式,不会真正下载视频到本地
        :type video_url: 解密之后的视频下载地址
        """
        print('正在下载...{}'.format(video_url, self.video_id))
        if not self.DEBUG:
            video = requests.get(video_url).content
            with open('{}.mp4'.format(self.video_id), 'wb') as f:
                f.write(video)

    def start(self, total_page=None):
        """
        :type total_page: 希望下载多少页的视频,每页下载24个,默认无限制
        """
        tid = self.tid()
        for _ in self.ajax(tid, total_page):
            try:
                video = self.decode(_)
            except Exception as e:
                print(e, self.video_id, '解密失败!')
                continue
            self.download(video)
            self.total += 1


if __name__ == '__main__':
    mp = MeiPai()
    mp.category_name = '舞蹈'
    mp.DEBUG = False
    mp.start(1)  # 参见start函数说明

相关文章

网友评论

      本文标题:下载美拍(舞蹈类)视频--Python

      本文链接:https://www.haomeiwen.com/subject/zojvdftx.html