我有一个模块,它从给定目录中的文件列表中计算校验和。问题是我的is_changed_file又长又丑,但是我的重构尝试失败了,所以我想要一些其他的观点.
import hashlib
import logging
# if available use the faster cPickle module
try:
import cPickle as pickle
except ImportError:
import pickle
from os import path
class DirectoryChecksum(object):
"""Manage the checksums of the given files in a directory
"""
def __init__(self, directory, to_hash):
self.directory = directory
self.to_hash = to_hash
self.checksum_file = path.join(self.directory, '.checksums')
self.checks = self._compute()
self.logger = logging.getLogger("checksum(%s): " % self.directory)
def _abs_path(self, filename):
return path.join(self.directory, filename)
def _get_checksum(self, filename):
content = open(self._abs_path(filename)).read()
return hashlib.md5(content).hexdigest()
def _compute(self):
"""Compute all the checksums for the files to hash
"""
dic = {}
for f in self.to_hash:
if self._file_exists(f):
dic[f] = self._get_checksum(f)
return dic
def _file_exists(self, filename):
return path.isfile(self._abs_path(filename))
def is_changed(self):
"""True if any of the files to hash has been changed
"""
return any(self.is_file_changed(x) for x in self.to_hash)
#FIXME: refactor this mess, there is also a bug which impacts on
#the airbus. eggs, so probably something to do with the path
def is_file_changed(self, filename):
"""Return true if the given file was changed
"""
if not self._has_checksum():
self.logger.debug("no checksum is available yet, creating it")
self.write_checksums()
return True
stored_checks = self.load_checksums()
if not self._file_exists(filename):
if filename in stored_checks:
self.logger.debug("file %s has been removed" % filename)
# what if it existed before and now it doesn't??
return True
else:
return False
checksum = self._get_checksum(filename)
if filename in stored_checks:
# if the file is already there but the values are changed
# then we also need to rewrite the stored checksums
if stored_checks[filename] != checksum:
self.write_checksums()
return True
else:
return False
else:
# this means that there is a new file to hash, just do it again
self.write_checksums()
return True
def _has_checksum(self):
return path.isfile(self.checksum_file)
def load_checksums(self):
"""Load the checksum file, returning the dictionary stored
"""
return pickle.load(open(self.checksum_file))
def write_checksums(self):
"""Write to output (potentially overwriting) the computed checksum dictionary
"""
self.logger.debug("writing the checksums to %s" % self.checksum_file)
return pickle.dump(self.checks, open(self.checksum_file, 'w'))发布于 2012-01-08 18:39:06
可能不需要重构该方法,因为它表示相当清晰的决策树。将其分解为较小的方法将导致可读性问题。
只需确保您有涵盖每个结果的单元测试,并且没有代码不处理的情况。
如果您确实想重构,我建议添加这样的方法:
def _check_log_writesums(cond, logmessage, rewrite=True):
if cond:
if logmessage:
self.logger.debug(logmessage)
if rewrite:
self.write_checksums()
return True
return False然后你可以这样称呼它:
if filename in stored_checks:
return self._check_log_writesums(stored_checks[filename] != checksum,
"",
rewrite=True)也许这个名字会更好(_update_checksums或_do_write_checksums)。
发布于 2012-01-15 23:08:18
我觉得有更好的方法来做我想做的事,经过一次重构之后,我想到了这个。
class DirChecksum(object):
def __init__(self, directory, files_to_hash):
self.directory = directory
self.files_to_hash = files_to_hash
def _abs_path(self, filename):
return path.join(self.directory, filename)
def _get_checksum(self, filename):
content = open(filename).read()
return hashlib.md5(content).hexdigest()
def _compute_checksums(self):
logger.debug("computing checksums for %s" % self.directory)
ck = {}
for fname in self.files_to_hash:
abs_fname = self._abs_path(fname)
if path.isfile(abs_fname):
ck[fname] = self._get_checksum(abs_fname)
return ck
@property
def checksums(self):
return self._compute_checksums()
def is_changed(self, old_checksums):
# there was nothing actually previously stored
if not old_checksums:
logger.debug("checksums were never computed before")
return True
actual_checksums = self.checksums
if len(actual_checksums) != len(old_checksums):
logger.debug("number of files hashed changed")
return True
# if _checksums isn't there it means that probably is never been called
return old_checksums != actual_checksums这是完全不同的,因为现在它没有在磁盘上读写任何东西,但是它使用带有校验和的字典作为输入。现在干净多了,即使当然还能做得更好。
https://codereview.stackexchange.com/questions/7529
复制相似问题