javhd高清python爬虫源码

javhd不解释,懂的人自然懂,这是python版爬虫,默认1080P高清,需要账号才能爬取
使用centos6.0+python3.0
需要requests库
pip3 install requests
账户设置
请在打开文件 在第二行设置 账户的cert
也就是你注册javhd后收到的Welcome to JAVHD邮件里面的Login URL中的credentials后面一段字符串
如:secure.javhd.com/login/?credentials=[这里是字符串]&lang=en
视频的清晰度设置
第三行为要爬取视频的清晰度

爬虫源码:

import requests,re,threading,os
cert='YWsxM240YWprOHxhZ3RrZWRjaXdo'
quality=1080
class spider: def __init__(self,sp): self.sp=sp def page(self,flag): page_url='https://javhd.com/zh/japanese-porn-videos/justadded/all/'+str(flag) return page_url def req(self): req=requests.Session() response=req.get('https://secure.javhd.com/login/index/direct?credentials='+cert+'&back=javhd.com&lang=zh', allow_redirects=False) req.get(response.headers['location']) return req def find_info(self,page_url): req=requests.get(page_url) info=re.findall(r'clickitem="(.*?)".*?t ">\n(.*?)\n.*?</span>',str(req.text),re.M) return info def find_mp4(self,id,reqget): url='https://javhd.com/zh/player/'+str(id)+'?type=vjs' req=reqget.get(url) return req.json() def sources_mp4(self,dict,reqget): for i in dict['sources']: if int(i['res'])==self.sp: w=reqget.get(i['src'],allow_redirects=False) return w.headers['location']
def Handler(start, end, url, filename): headers = {'Range': 'bytes=%d-%d' % (start, end)} with requests.get(url, headers=headers,stream=True) as r: with open(filename+'.mp4', "r+b") as fp: fp.seek(start) var = fp.tell() fp.write(r.content)
def download(url,tittle, num_thread = 10): r = requests.head(url) try: file_name = tittle file_size = int(r.headers['content-length']) except: print("检查URL,或不支持对线程下载") return fp = open(file_name+'.mp4', "wb") fp.truncate(file_size) fp.close() part = file_size // num_thread for i in range(num_thread): start = part * i if i == num_thread - 1: end = file_size else: end = start + part t = threading.Thread(target=Handler, kwargs={'start': start, 'end': end, 'url': url, 'filename': file_name}) t.setDaemon(True) t.start() # 等待所有线程下载完成 main_thread = threading.current_thread() for t in threading.enumerate(): if t is main_thread: continue t.join() print('%s 下载完成' % file_name)
def run(): s=spider(quality) reqget=s.req() flag=1 while True: page=s.page(flag) info=s.find_info(page) for i in info: mp4_dict=s.find_mp4(i[0],reqget) tittle=i[1].strip() print(tittle) if os.path.exists(str(tittle)+'.mp4')==False: print(s.sources_mp4(mp4_dict,reqget)) download(s.sources_mp4(mp4_dict,reqget),tittle) else:continue flag+=1
if __name__=='__main__': run()

 

郑重声明:
本站所有内容均由互联网收集整理、网友上传,并且以计算机技术研究交流为目的,仅供大家参考、学习,不存在任何商业目的与商业用途。若您需要商业运营或用于其他商业活动,请您购买正版授权并合法使用。 我们不承担任何技术及版权问题,且不对任何资源负法律责任。如无法链接失效或侵犯版权,请给我们来信:66553826#qq.com

11

发表评论