首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >使用Python的XMLTV EPG指南数据

使用Python的XMLTV EPG指南数据
EN

Stack Overflow用户
提问于 2018-05-07 00:04:36
回答 1查看 1.5K关注 0票数 0

我有一个python脚本,它可以端到端地运行,并为Kodi EPG生成看起来有效的XMLTV指南数据(尽管这是我第一次尝试创建这样的数据):

Tv_guide类:

代码语言:javascript
复制
def __init__(self, start, end, args, kwargs):

    self.start = 1
    self.end = 3000


global i, channel_list, url_list, date_check, image_list, super_list, epg_list

channel_list = []
url_list = []
date_check = []
image_list = []
super_list = []
epg_list = []

for i in range(66, 67):

    global div_list
    div_list = []

    time.sleep(1)


    def session_setup():


        global r, html


        SelectProxy.select_proxy()
        local_proxy = SelectProxy.global_proxy

        session = requests.Session()
        session.proxies = {local_proxy}

        url = ['http://www.tvguide.co.uk/mobile/channellisting.asp?ch=', str(i)]
        url = ''.join(url)

        if url not in url_list:

            url_list.append(url)

        headers ={
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.',
        'Connection': 'keep-alive',
        'Host': 'www.tvguide.co.uk',
        'Referer': 'http://www.tvguide.co.uk/mobile/',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'
        }

        r = session.get(url, headers=headers)

        html = r.content

    session_setup()


    def soup_scanner():


        global soup
        global channel, image4, div_list2

        time.sleep(1)

        soup = BeautifulSoup(r.text, "html.parser")


        try:

            channel = soup.find('h1').text

            for div in soup.select('div'):

                if not div.script:

                    for image in div.find_all('img'):

                        image2 = re.search(r' src="\S+?"', str(image))
                        image2 = image2.group(0)
                        image3 = image2.split('src="')[1]
                        image4a = image3.split('"')[0]

                        div_list.append(image4a)

            div_list2 = ''
            div_list2 = div_list[3]


        except Exception as exc:

            pass


    soup_scanner()


    def soup_parser():


        global joined_list, joined_list2, date, time_list, desc_list, rating_list, date_list


        rolling_date = soup.find_all('h2')

        joined_list2 = []


        for x in soup.select('table'):

            time_list = []
            desc_list = []
            rating_list = []
            date_list = []
            joined_list = []

            xdate = rolling_date[0].text
            rolling_date.pop(0)


            for tr in x.select('tr'):

                if not tr.script:


                    for td in tr.find_all('td'):

                        date_list.append(xdate)

                        a = ''.join(re.sub(r'\s+', ' ', td.text))
                        b = a.strip()


                        if b[:1] in '0123456789':


                            time_list.append(b)


                        else:

                            if ' Rating' in b:

                                c = b.split(' Rating')

                            else:

                                c = b.split(' Rating')
                                c.append(0.0)

                            desc = c[0]
                            desc_list.append(desc)


                            rating = ''.join(['Rating: ', str(c[1])])
                            rating_list.append(rating)



            try:

                time_list_lag = time_list[:]
                del time_list_lag[0]
                joined_list = zip(time_list, time_list_lag, desc_list, rating_list, date_list)
                joined_list2.append(joined_list)


            except Exception as exc:

                print traceback.format_exc() 
                pass


    soup_parser()


    def soup_to_text():


        global cat_list, super_list2


        for sub in joined_list2:

            for e in sub:

                super_list2 = []

                now = datetime.now()

                if channel not in channel_list:

                    channel_list.append(channel)

                if date not in date_check:

                    date_check.append(date)

                if div_list2 not in image_list:

                    image_list.append(div_list2)

                super_list2.append(channel)
                super_list2.append(div_list2)
                super_list2.append('http://www.tv.sky.com')
                super_list2.append(date)


                try:

                    starttime = datetime.strptime(' '.join([str(now.year), e[4], e[0]]), '%Y %a %d %b %H:%M%p').strftime('%Y%m%d%H%M%S')
                    endtime = datetime.strptime(' '.join([str(now.year), e[4], e[1]]), '%Y %a %d %b %H:%M%p').strftime('%Y%m%d%H%M%S')

                    global epg_data

                    clean_channel = str(channel).replace('&', '&')
                    clean_e2 = str(e[2]).replace('&', '&')
                    clean_e3 = str(e[3]).replace('&', '&')



                    epg_data = ''.join(['<program start="',starttime,' +0200" stop="',endtime,' +0200" channel="',clean_channel,'">','<desc lang="eng">',clean_e2,' ',clean_e3,'</desc>','<icon src="',div_list2,'" />', \
                           '<country>UK</country>','</program>'])

                    epg_list.append(epg_data)


                except Exception as exc:

                    pass
                    #print e
                    #print traceback.format_exc()


            super_list.append(super_list2)
            cat_list = zip(channel_list, date_check, url_list, image_list)


    soup_to_text()

print '**********************************************************************************************************************************************************************'


print '<?xml version="1.0" encoding="UTF-8"?>'
print'<tv generator-info-name="TV Guide Scraper - by Aaron Aardvark" generator-info-url="http://www.aaardvark.com">'

for superx in super_list:

    print ''.join(['<channel id="',str(superx[0]),'">'])
    print ''.join(['<display-name lang="en">',str(superx[0]),'</display-name>'])
    print ''.join(['<icon src="',str(superx[1]),'" />'])
    print ''.join(['<url>',str(superx[2]),'</url>'])
    print ''.join(['</channel>'])

for epg in epg_list:

    print epg

print '</tv>'

为了测试它,我现在只抓取了一个通道。我在NextPVR后端和一个在线解析工具中都进行了尝试,并获得了相同的结果。我的输出正确地生成了一个频道列表,但没有节目数据。

我已经将我精简后的输出包括在下面。有没有人看到我做错了什么?

谢谢

代码语言:javascript
复制
<?xml version="1.0" encoding="UTF-8"?>
<tv generator-info-name="TV Guide Scraper - by Aaron Aardvark" generator-info-url="http://www.aaardvark.com">
<channel id="BBC News">
<display-name lang="en">BBC News</display-name>
<icon src="http://my.tvguide.co.uk/channel_logos/60x35/66.png" />
<url>http://www.tv.sky.com</url>
</channel>
<program start="20180506060000 +0200" stop="20180506070000 +0200" channel="BBC News"><desc lang="eng">Breakfast A round-up of national and international news, plus sports reports, weather forecasts and arts and entertainment features (Subtitles) Rating: : 1.5</desc><icon src="http://my.tvguide.co.uk/channel_logos/60x35/66.png" /><country>UK</country></program>
</tv>
EN

回答 1

Stack Overflow用户

发布于 2018-05-07 21:09:21

如果有人遇到同样的问题,最终解决了这个问题。标记应为<programme>,而不是<program>

票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/50201769

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档