我知道还有其他类。我写这篇文章是因为我不需要所有的花哨,没有编辑,没有演讲等等,我只需要能够搜索标题并获得wiki标记。
任何建议、建议、评论、评论或任何真正的东西。
# -*- coding: utf-8 -*-
import urllib2
import re
import time
import sys
from urllib import quote_plus, _is_unicode
try:
import json
except:
import simplejson as json
def enum(*sequential, **named):
enums = dict(zip(sequential, range(len(sequential))), **named)
return type('Enum', (), enums)
class Wiki:
def __init__(self, api=None):
if api == None:
self.api = "http://en.wikipedia.org/w/api.php"
else:
self.api = api
return
"""A HTTP Request"""
def downloadFile(self, URL=None):
"""
URL - The URL to fetch
"""
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
responce = opener.open(URL)
data = responce.read()
responce.close()
return data.decode(encoding='UTF-8',errors='strict')
"""Search the wiki for titles"""
def search(self, searchString):
results = []
if (searchString != u""):
encoded_searchString = searchString
if isinstance(encoded_searchString, unicode):
encoded_searchString = searchString.encode('utf-8')
url = self.api + "?action=query&list=search&format=json&srlimit=10&srsearch=" + urllib2.quote(encoded_searchString)
rawData = self.downloadFile(url)
object = json.loads(rawData)
if object:
if 'query' in object:
for item in object['query']['search']:
wikiTitle = item['title']
if isinstance(wikiTitle, str):
wikiTitle = wikiTitle.decode(encoding='UTF-8',errors='strict')
results.append(wikiTitle)
return results
"""Search for the top wiki title"""
def searchTop(self, searchString):
results = self.search(searchString)
if len(results) > 0:
return results[0]
else:
return u""
"""Get the raw markup for a title"""
def getPage(self, title):
# Do the best we can to get a valid wiki title
wikiTitle = self.searchTop(title)
if (wikiTitle != u""):
encoded_title = wikiTitle
if isinstance(encoded_title, unicode):
encoded_title = title.encode('utf-8')
url = self.api + "?action=query&prop=revisions&format=json&rvprop=content&rvlimit=1&titles=" + urllib2.quote(encoded_title)
rawData = self.downloadFile(url)
object = json.loads(rawData)
for k, v in object['query']['pages'].items():
if 'revisions' in v:
return v['revisions'][0]['*']
return u""发布于 2012-10-26 13:43:49
class Wiki:
def __init__(self, api="http://en.wikipedia.org/w/api.php"):
self.api = api
return这个返回没有任何作用。
"""A HTTP Request"""
def downloadFile(self, URL=None):Python约定是对方法名使用lowercase_with_underscores。
"""
URL - The URL to fetch
"""
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]你为什么假装是Mozilla?
responce = opener.open(URL)反应拼写错误
data = responce.read()
responce.close()
return data.decode(encoding='UTF-8',errors='strict')整个函数应该是一个自由函数,而不是一个方法。
"""Search the wiki for titles"""
def search(self, searchString):参数按约定应命名为lowercase_with_underscore
results = []
if (searchString != u""):不需要(和)。你也可以这么做:if searchString:
encoded_searchString = searchString为什么?
if isinstance(encoded_searchString, unicode):
encoded_searchString = searchString.encode('utf-8')
url = self.api + "?action=query&list=search&format=json&srlimit=10&srsearch=" + urllib2.quote(encoded_searchString)
rawData = self.downloadFile(url)
object = json.loads(rawData)我会把这两行结合起来
if object:在什么情况下,这将是虚假的?如果发生这种情况,你可能应该做些什么,除了假装什么都没有发生。
if 'query' in object:
for item in object['query']['search']:
wikiTitle = item['title']
if isinstance(wikiTitle, str):
wikiTitle = wikiTitle.decode(encoding='UTF-8',errors='strict')
results.append(wikiTitle)
return results
"""Search for the top wiki title"""
def searchTop(self, searchString):
results = self.search(searchString)
if len(results) > 0:
return results[0]
else:
return u""如果没有找到结果,您真的想要空字符串吗?你应该在这里抛出一个例外。返回空字符串只会使故障难以跟踪。
"""Get the raw markup for a title"""
def getPage(self, title):
# Do the best we can to get a valid wiki title
wikiTitle = self.searchTop(title)
if (wikiTitle != u""):
encoded_title = wikiTitle
if isinstance(encoded_title, unicode):
encoded_title = title.encode('utf-8')
url = self.api + "?action=query&prop=revisions&format=json&rvprop=content&rvlimit=1&titles=" + urllib2.quote(encoded_title)
rawData = self.downloadFile(url)
object = json.loads(rawData)
for k, v in object['query']['pages'].items():
if 'revisions' in v:
return v['revisions'][0]['*']
return u""不要默认为愚蠢的缺省值。如果您无法获得请求的页面抛出一个错误尽可能多的细节,不要只是抛给我一个空字符串。
发布于 2012-10-24 10:23:57
一个明显的跳到我面前的是:
class Wiki:
def __init__(self, api=None):
if api == None:
self.api = "http://en.wikipedia.org/w/api.php"
else:
self.api = api
return可以简化为:
class Wiki:
def __init__(self, api="http://en.wikipedia.org/w/api.php"):
self.api = apihttps://codereview.stackexchange.com/questions/17861
复制相似问题