昨天看了一个B站up的视频关于利用解析爬取指定电影并下载的视频,突发奇想爬一下我平时用的动漫网站的视频。于是去csdn上看了一个教程,稍作修改一下就能用了。
参考资料:樱花动漫中的视频下载分析
话不多说上代码:
import requests as rq
import re
from bs4 import BeautifulSoup
import json
url = 'http://www.imomoe.io/search.asp'
data = { 'searchword': '心理测量者'. encode ( 'gbk' ) }
headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' ,
'Accept-Encoding': 'gzip, deflate' ,
'Accept-Language': 'zh-CN,zh;q=0.9' ,
'Cache-Control': 'max-age=0' ,
'Connection': 'keep-alive' ,
'Content-Length': '35' ,
'Content-Type': 'application/x-www-form-urlencoded' ,
'Cookie': 'UM_distinctid=170eb510b7d0-0ea64ec9866c75-4313f6a-144000-170eb510b7e2b4; CNZZDATA1260742008=448438639-1584494107-%7C1584494107; Hm_lvt_38c112aee0c8dc4d8d4127bb172cc197=1584495924; bdshare_firstime=1584495927443; ASPSESSIONIDSQBSRRDA=EAOODOKAOHCDOLDJLLPKPLDD; first_h=1584498564507; count_h=1; first_m=1584498564511; count_m=1; __music_index__=2; qike123=%u5FC3%u7406%u6D4B%u91CF%u8005%u7B2C%u4E09%u5B63%20%u7B2C01%u96C6^http%3A//www.imomoe.in/player/7656-0-0.html_$_|; Hm_lpvt_38c112aee0c8dc4d8d4127bb172cc197=1584498837' ,
'Host': 'www.imomoe.in' ,
'Origin': 'http://www.imomoe.in' ,
'Referer': 'http://www.imomoe.in/search.asp' ,
'Upgrade-Insecure-Requests': '1' ,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36' }
reponse =rq. post (url ,data =data ,headers =headers ). content. decode ( 'gbk' )
pattern = re. compile (r '<h2><a href="(.+?)" target="_blank" title=".+?">(.+?)</a></h2>' )
info =pattern. findall (reponse )
# print(reponse)
print (info )
print ( '这是一个搜索结果' )
url2 = 'http://www.imomoe.io'+info [ 0 ] [ 0 ]
print (url2 )
print ( '这是其中一个地址' )
opensearch =rq. get (url2 ). content. decode ( 'gbk' )
#播放地址1
soup =BeautifulSoup (opensearch )
# alljishu=soup.find("div",id="play_0").get_text()#所有集数
# print(alljishu)
src = str (soup. find ( "div" , id = "play_0" ) )
pattern2 = re. compile (r '<a href="(.+?)" target="_blank" title=".+?">(.+?)</a>' )
addr =pattern2. findall (src )
print (addr )
url3 = 'http://www.imomoe.in'+addr [ 0 ] [ 0 ]
print (url3 )
bofangye =rq. get (url3 ). content. decode ( 'gbk' )
soup2 =BeautifulSoup (bofangye )
data =soup2. find ( 'div' ,class_ = "player" )
pattern3 = re. compile (r '<script src="(.+)?" type="text/javascript"></script>' )
playdata =pattern3. findall ( str (data ) ) [ 0 ]
print (playdata )
url3 = 'http://www.imomoe.in'+playdata
addrurl =rq. get (url3 ). content. decode ( 'gbk' )
print (addrurl )
src2 =addrurl [ 18:: ]. split ( ',urlinfo' ) [ 0 ]
urllist = eval (src2 ) [ 0 ]
urllist. pop ( 0 )
def geturl ( ):
a = [ ]
for i in urllist [ 0 ]:
nameurl =i. split ( '$' ) [ 0 ] ,i. split ( '$' ) [ 1 ]
a. append (nameurl )
return a
headers2 = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36' }
print (geturl ( ) )
def download ( ):
for i in geturl ( ):
try:
content =rq. get (i [ 1 ] ,headers =headers2 ). content
with open (r 'E:\动漫\{}.mp4'. format (info [ 0 ] [ 1 ]+i [ 0 ] ) , 'wb' ) as f:
f. write (content )
print ( 'success' )
except:
continue
download ( )#如果出现错误请检查一下网址是否正确,这里使用的imomoe.in,而原文使用的是imomoe.io;该网站有不同的域名
行云博客 - 免责申明 本站提供的一切软件、教程和内容信息仅限用于学习和研究目的;不得将上述内容用于商业或者非法用途,否则,一切后果请用户自负。本站信息来自网络,版权争议与本站无关。您必须在下载后的24个小时之内,从您的电脑手机中彻底删除上述内容。如果您喜欢该程序,请支持正版,购买注册,得到更好的正版服务。如有侵权请邮件与我联系处理。敬请谅解!
本文链接:https://www.xy586.top/472.html
转载请注明文章来源:行云博客 » python3爬取樱花动漫的视频