首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >Python 3 while True:循环

Python 3 while True:循环
EN

Stack Overflow用户
提问于 2016-05-26 08:37:25
回答 1查看 1K关注 0票数 0

我正在努力在我的代码中实现while True:循环。

在执行if file_removed in bs:时,如果页面上有file_removed,我需要从提供的链接列表中要求用户提供一个新的int (返回到which_link = input(lb + "\n|Which vodlocker link should we use?\n|\n| --> "))。

代码语言:javascript
复制
#!/usr/bin/python3
# -*- coding: utf-8 -*-

from GoogleScraper import scrape_with_config, GoogleSearchError
from urllib.request import urlretrieve
from urllib.request import URLError
from urllib.request import urlopen
from hurry.filesize import size
from sys import getsizeof
from bs4 import BeautifulSoup as BS
import subprocess as sp
import requests as r
import pandas as pd
from pyprind import ProgBar
import psutil
import string
import tqdm
import time
import sys
import re
import os    

def the_link():
    lb = "+--------------------------------------------------------------+"
    site = "site:vodlocker.com " # prefix of search term
    keywords = site + input(lb + "\n|Film to search\n|\n| --> ") # ask user for search term
    config = {
    'use_own_ip': True,
    'keyword': keywords,
    'search_engines': ['google'],           # various configuration settings
    'num_pages_for_keyword': 1,
    'scrape_method': 'http',
    #'sel_browser': 'phantomjs',            # this makes scraping with browsers headless and quite fast.
    'do_caching': True,
    'num_results_per_page': 50,
    'log_level': 'CRITICAL',
    'output_filename': 'results.csv'        # file to save links to 
    }
    try:
        search = scrape_with_config(config)
    except GoogleSearchError as e:
        print(e)
    csv_file = "results.csv" # create an instance of the file
    df = pd.read_csv(csv_file) # read said file
    vodlocker_link = df['link'] # get the vodlocker links from the file (note: you can also use df.column_name
    link_id = df['title']
    results = df['num_results_for_query']
    results_lower = results[0].lower()
    print(lb + "\nWe have found " + results_lower + "\n") # print the link we will use
    title_dict = [
    "HDTV","hdtv", "BDRip","BRrip", "HDRip", "HDTS", "hdts",  # disc type
    "720p", "1080p",                                          # dimensions
    "XviD", "Watch",
    "mp4", "MP4", "X264",                      
    "mkv", "MKV",                                             # video types
    "avi", "AVI", 
    "-LOL", "LOL",
    "ac3", "AC3",
    "-playnow", "playnow", "VoDLocker", "-", "_"
    "AC3-PLAYNOW",
    "EVO", "evo",
    "GECKOS", "FASTSUB",                                      # tags 
    "DIMENSION", "-DIMENSION",
    "REPACK", "Vostfr",
    "VOSTFR", "libre",
    "fum", "-fum",
    "on 4vid tv online", "(", ")"
    ]
    regex_title = r"|".join(title_dict)
    print(link_id)
    s = r.Session() # create an instance of a requests session
    headers = {"User-Agent":"Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X) Safari/9537.53", "Accept":"text/q=0.9,image/webp,*/*;q=0.8", "Accept-Language":"en-US,en"} # set the user agent and stuff in the header 
    which_link = input(lb + "\n|Which vodlocker link should we use?\n|\n| --> ")
    req = s.get(vodlocker_link[int(which_link)], headers=headers) # get the page from link
    bs = BS(req.text, "lxml") # create a soup object
    file_removed = "<h3>The file was removed by administrator</h3>"
    if file_removed in bs:
        print("The file was removed by administrator")
    else:
        film = bs.find(type="video/mp4") # find the film link
        film_link = film["src"] # get the actual film link
        title = bs.find(id="file_title").get_text()
        fixed_title = re.sub(regex_title, "",title, flags=re.I)
        title_lower = fixed_title.lower()
        title_strip = title_lower.strip()
        title_hyphen = title_strip.replace(" ", "-")
        print(lb + "\n|The title of the film is:\n|" + fixed_title)
        print(lb + "\n|We found a video link on the vodlocker page:\n|" + film_link)
        prfx = "/home/jack/server.files/mp4/films/" # prefix for file_name location
    #    file_name = input("Please name the file:\n--> ") # ask user for file name
        ext = film_link[-4:]
        file_name = title_hyphen + ext
        print(lb + "\n|We will name the file:\n|\n|" + file_name)
        file_name_ok = input(lb + "\n|We have attempted to name the file from the title. Is our guess O.K?\n|\n|[Any key to cotinue]--> ") #TODO prompt user for name if we cant guess it
        u = s.get(film_link, headers=headers, stream=True) # create an instance of the file stream
        file_size = int(u.headers["content-length"]) # get meta info -- file size
        print(lb + "\n|File Path and name:\n|\n|" + prfx + file_name) # print the file name and path
        print(lb + "\n|File Size: " + size(file_size)) # print the file size
        bar = ProgBar(file_size / 1024, title=lb + "\n|Downloading:\n|\n|" + file_name + "\n" + lb, stream=sys.stdout, bar_char='█', update_interval=1)
        with open(prfx + file_name, 'wb') as f:
            dl = 0
            if file_size is None: # no content length header
                f.write(r.content) #TODO print error if size is none!
            else:
                for chunk in u.iter_content(1024):
                    dl += len(chunk)
                    f.write(chunk)
                    f.flush()
                    bar.update(item_id = file_name)
        print(lb)
        print("\n|Finished downloading " + file_name)
        print(lb)

the_link()

我知道我的代码杂乱无章,需要很好地格式化,所以任何关于这方面的建议都将不胜感激。

EN

回答 1

Stack Overflow用户

发布于 2016-05-26 08:46:07

将需要重复的部分放入循环中。当您达到成功的退出条件时,break将退出循环:

代码语言:javascript
复制
...
headers = {"User-Agent":"Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X) Safari/9537.53", "Accept":"text/q=0.9,image/webp,*/*;q=0.8", "Accept-Language":"en-US,en"} # set the user agent and stuff in the header 
while True:
    which_link = input(lb + "\n|Which vodlocker link should we use?\n|\n| --> ")
    req = s.get(vodlocker_link[int(which_link)], headers=headers) # get the page from link
    bs = BS(req.text, "lxml") # create a soup object
    file_removed = "<h3>The file was removed by administrator</h3>"
    if file_removed in bs:
        print("The file was removed by administrator")
    else:
        break
film = bs.find(type="video/mp4") # find the film link
film_link = film["src"] # get the actual film link
...
票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/37449772

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档