爬取清风网vvvdj.com播放的mp3到文件

10世界杯决赛

爬取清风网播放mp3到文件

#coding=utf-8

import contextlib

import os

import tempfile

import requests

from urllib import request

import re

import execjs

def Myurlretrieve(url, filename=None, reporthook=None, data=None, mode="ab"):

(url_type, path) = url.split(':',1)

with contextlib.closing(request.urlopen(url=url,data=data)) as fp:

headers = fp.info()

# Just return the local path and the "headers" for file://

# URLs. No sense in performing a copy unless requested.

if url_type == "file" and not filename:

return os.path.normpath(path), headers

# Handle temporary file setup.

if filename:

tfp = open(filename, mode)

else:

tfp = tempfile.NamedTemporaryFile(delete=False)

filename = tfp.name

with tfp:

result = filename, headers

bs = 1024 * 8

size = -1

read = 0

blocknum = 0

if "content-length" in headers:

size = int(headers["Content-Length"])

if reporthook:

reporthook(blocknum, bs, size)

while True:

block = fp.read(bs)

if not block:

break

read += len(block)

tfp.write(block)

blocknum += 1

if reporthook:

reporthook(blocknum, bs, size)

if size >= 0 and read < size:

#pass

raise Exception(

"retrieval incomplete: got only %i out of %i bytes"

% (read, size), result)

return result

class Url_ts():

"""

通过一条url获取m3u8文件并下载其ts文件合并到.MP4

"""

def __init__(self,url,filename="defualt.mp4"):

"""

:param url: 下载m3u8文件地址

:param filename: 文件名如."defualt.mp4"

"""

url='https:'+url if 'http' != url[:4] else url

self.url=url

self.server=self.url.rsplit("/", 2)[0] + "/"

info=re.findall('(.*?)(\d+).m3u8\?.*?',url)

self.head=info[0][0]

self.id=info[0][1]

os.makedirs('download',exist_ok=True)

if filename:

self.filename=filename

else:

self.filename = 'download/'+str(self.id)+'.mp3'

def getVideoTsUrl(self):

"""

通过url 下载m3u8文件,读取m3u8文件获取.ts文件链接

:param url: 下载m3u8文件的地址

:return: yeild :ts文件地址

"""

all_content = requests.get(url=self.url).text# 获取M3U8的文件内容

file_line = all_content.split("\n") # 读取文件里的每一行

# 通过判断文件头来确定是否是M3U8文件

if file_line[0] != "#EXTM3U":

raise BaseException(u"非M3U8的链接")

else:

unknow = True # 用来判断是否找到了下载的地址

for index, line in enumerate(file_line):

#print(index,line)

if "EXT" in line:

unknow = False

else:

if line != "":

pd_url =self.head+line

yield pd_url

if unknow:

raise BaseException("未找到对应的下载链接")

def download_ts(self):

"""

下载ts文件

:return:

"""

for url in self.getVideoTsUrl():

print(url)

Myurlretrieve(url=url,filename=self.filename, reporthook=self.Schedule, mode="ab")

def Schedule(self,a, b, c):

"""

进度条

:param a:

:param b:

:param c:

:return:

"""

per = 100.0 * a * b / c

if per > 100:

per = 1

#print(" " + "%.2f%% 已经下载的大小:%ld 文件大小:%ld" % (per, a * b, c) + '\r')

def run(self):

self.download_ts()

class vvvdj:

headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6821.400 QQBrowser/10.3.3040.400',

'Cookie':'PLAYSTYLE=0; bf=1; cnzz_eid=21216903-1317289499-; musicls=%7C171140%7C%2C%7C164533%7C%2C%7C74038%7C%2C%7C171136%7C%2C; qf_musiclog=%7C171140%7C%2C%7C164533%7C%2C%7C74038%7C%2C%7C171136%7C%2C; sin44053=; rtime=2; Hm_lvt_d89009fe03c70c3c98531373f1b90625=1547208642,1547274108,1547466115; Hm_lvt_597685a72dadb90d39ad0191f13b72af=1547208642,1547274108,1547466115; Hm_lpvt_d89009fe03c70c3c98531373f1b90625=1547466130; Hm_lpvt_597685a72dadb90d39ad0191f13b72af=1547466130; cnzz_a44053=6; ltime=1547467897127',

'Connection':'keep-alive',

'Upgrade-Insecure-Requests':'1',}

def __init__(self):

pass

def js(self,s):

ctx=execjs.compile(s)

x=ctx.call('getUrl')

return x

def getHtml(self,url):

try:

html=requests.get(url=url,headers=self.headers)

html=html.content.decode('utf-8')

self.title=re.findall('(.*?)',html)[0].split(',',-1)[0]

jsxStr = html.find(r'function DeCode() {')

jsyStr = html.find('function Playall(num)')

funcCode = html[jsxStr:jsyStr]

s='function getUrl(){ var playurl=""; var XCODE=new DeCode();'+funcCode+ ';return playurl;}'

urlString=self.js(s)

bl=True

return urlString,bl

except Exception as e:

bl=False

return e.args[0],bl

while True:

url=input('输入地址:').strip()

#url='https://www.vvvdj.com/play/210585.html'

_id=re.findall('play/(\d+).html',url)[0]

vvv=vvvdj()

url,bl=vvv.getHtml(url)

os.makedirs('download',exist_ok=True)

fileName='download/{}.mp3'.format(vvv.title)

if os.path.exists(fileName):

if v:=input('已经存在,是否覆盖(y/n):'):

if v=='n':

continue

x=Url_ts(url,filename=fileName)

x.run()

print('下载{}完成'.format(fileName))