这是我为搜索“罗顿番茄”电影API而制作的Python模块。它将搜索结果缓存在SQLite数据库中。
我可以对缓存系统做什么改进?是否有更好的方法来存储API搜索的结果?
是否有更好的方法与API进行通信?
import urllib2
import json
import sqlite3
import time
# sqlite database file
cache_database = "movies.db"
# how many seconds before the entry expires
cache_expiration = 60*60 # one hour
class Cache:
def get_conn(self):
"""
connect with the sqlite database
"""
conn = sqlite3.connect(cache_database)
c = conn.cursor()
c.execute(""" CREATE TABLE IF NOT EXISTS movies(
search_query TEXT PRIMARY KEY,
page_number INT,
timestamp INTEGER,
search_results BLOB); """)
return conn
def get(self, search_query, page_number):
"""
get the search results from the database
"""
with self.get_conn() as conn:
c = conn.cursor()
query = """SELECT search_results FROM movies WHERE
search_query = \"{search_query}\" AND
page_number = {page_number} AND
(strftime('%s', 'now') - timestamp) < {cache_expiration}"""
query = query.format(search_query = search_query,
page_number = page_number,
cache_expiration = cache_expiration)
results = ""
for a in c.execute(query):
results = a
return results
def put(self, search_query, page_number, search_results):
"""
put the results into the database
"""
timestamp = int( time.time() )
with self.get_conn() as conn:
c = conn.cursor()
insert = """ INSERT OR REPLACE INTO movies
(search_query, page_number, timestamp, search_results) VALUES
(\"{search_query}\", {page_number}, {timestamp}, ?); """
insert = insert.format(search_query = search_query,
page_number = page_number,
timestamp = timestamp)
c.execute(insert, (search_results,))
conn.commit()
class Movie:
api_key = ""
userAgent = "MovieInfoBot/1.0"
def search(self, query, results_per_page=25, page_number=1):
"""
searches for movies: movie name, result limit, page number
"""
cache = Cache()
result = cache.get(query, page_number)
if result != "":
print "using cache"
movie_json = result[0]
else:
print "using web"
#format the url
base_url = "http://api.rottentomatoes.com/api/public/v1.0/movies.json"
url = "{base_url}?apikey={api_key}&q={search_term}&page_limit={results_per_page}&page={page_number}"
param = {}
param["base_url"] = base_url
param["api_key"] = self.api_key
param["search_term"] = urllib2.quote(query.encode("utf8"))
param["results_per_page"] = results_per_page
param["page_number"] = page_number
url = url.format(**param)
req = urllib2.Request(url, headers={ 'User-Agent': self.userAgent })
movie_json = urllib2.urlopen(req).read()
# put the results into the movie cache
cache.put(query, page_number, movie_json)
movie_dict = json.loads(movie_json)
return movie_dict
movie = Movie()
movie.api_key = "xua5v8mbvermd2t9v5gyeaua"
data = movie.search("the wizard of oz")
print json.dumps(data, indent=4)下面是GitHub https://github.com/kylelk/Rotten-Tomatoes上的模块
发布于 2014-05-15 20:08:08
这是个不错的剧本!(你应该开放源代码,我会使用它。)
您应该在查询中使用准备好的语句。它不仅更容易写,而且更有效率。例如:
def get(self, search_query, page_number):
with self.get_conn() as conn:
c = conn.cursor()
query = """SELECT search_results FROM movies
WHERE search_query = ?
AND page_number = ?
AND strftime('%s', 'now') - timestamp < ?; """
c.execute(query, (search_query, page_number, cache_expiration))
return c.fetchone()请注意,您不需要再担心引用。
您也可以对您的Cache.put方法执行相同的操作:
def put(self, search_query, page_number, search_results):
timestamp = int(time.time())
with self.get_conn() as conn:
c = conn.cursor()
insert = """INSERT OR REPLACE INTO movies
(search_query, page_number, timestamp, search_results)
VALUES (?, ?, ?, ?); """
c.execute(insert, (search_query, page_number, timestamp, search_results,))
conn.commit()而不是这样:
results = ""
for a in c.execute(query):
results = a
return results这样会更简单、更干净:
c.execute(query)
return c.fetchone()除非,您确实希望迭代多个结果,并且只返回最后一个结果,就像在原始代码中一样。
现代类应该扩展object,例如:
class Cache(object):
...
class Movie(object):
...您可以简化这一点:
if result != "":
...就像这样:
if result:
...遵循PEP8 (官方的Python风格指南)是很好的。
https://codereview.stackexchange.com/questions/49785
复制相似问题