作为python中我正在做的个人项目的一部分,我已经开始编写命令行实用程序,将.hdf文件转换为JSON文件。然而,在完成转换之前,我编写了一些验证代码,以检查文件是否存在,是否具有正确的扩展名和正确的基本格式。
这是目前为止的代码
import sys
import h5py
from pathlib import Path
# -------------------------------------------------
# Utility classes defined below.
# -------------------------------------------------
class Validator:
def print_help_text():
print("The processor.py utility is designed to take a single argument, the path")
print("to a NASA EarthData Global Fire Emissions Database GFED4.1s_yyyy.hdf5 file.")
print("Example - $ ./preprocess.py some/directory/GFED4.1s_2015.hdf5\n")
print("If a valid file path is passed to the utility it should output individual JSO")
print("files for each month, that contain data in the format required to train the")
print("emissions predictor.\n")
print("By default the new files will be ouput to the same directory that contains")
print("the script. Alternatively, you can provide a second argument with a path to")
print("another directory for the output files to be placed in.")
def valid_hdf_file(path_string):
valid_extensions = ("hdf","hdf4","hdf5","h4","h5", "he2", "he5")
if path_string.split(".")[-1] in valid_extensions:
if Path(path_string).is_file():
return True
else:
print("\n'" + path_string + "' is not a valid file.\n")
return False
else:
print("\nThe input file must be an HDF file with a correct extension.\n")
return False
def valid_arguments(arguements):
args = len(arguements)
if (args == 2 or args == 3) and arguements[1] != "--help":
path_to_data = arguements[1]
return Validator.valid_hdf_file(path_to_data)
else:
Validator.print_help_text()
return False
def valid_leaf_groups(group, month, hdf_file):
groups_and_leaves = {
"biosphere": ("BB", "NPP", "Rh"),
"burned_area": ("burned_fraction",),
"emissions": ("C", "DM")
}
valid = True
for leaf in groups_and_leaves[group]:
full_group = group + "/" + ("%02d" % month) + "/" + leaf
if full_group not in hdf_file:
valid = False
print("Expected group '" + full_group + "' not in HDF file.")
return valid
def valid_hdf_structure(hdf_file):
valid = True
for group in ("ancill/basis_regions", "lon", "lat"):
if group not in hdf_file:
valid = False
print("Expected group '" + group + "' not in HDF file.")
for group in ("biosphere", "burned_area", "emissions"):
for month in range(1,13):
full_group = group + "/" + ("%02d" % month)
if full_group not in hdf_file:
valid = False
print("Expected group '" + full_group + "' not in HDF file.")
else:
valid = valid and Validator.valid_leaf_groups(group, month, hdf_file)
return valid
# -------------------------------------------------
# Script starts here.
# -------------------------------------------------
if __name__ == "__main__":
if not Validator.valid_arguments(sys.argv):
sys.exit()
filename = sys.argv[1]
print("Processing - " + filename)
hdf_file = h5py.File(filename, 'r')
if not Validator.valid_hdf_structure(hdf_file):
sys.exit()
print("Basic structure of hdf file confirmed to conform to GFED4 format.")发布于 2018-01-01 01:29:06
关于你的代码的一些评论。不按特定顺序:
与其使用一组print语句,不如考虑使用一个文本块,并使用如下循环:
def print_help_text():
help_text = """
The processor.py utility is designed to take a single argument, the path
to a NASA EarthData Global Fire Emissions Database GFED4.1s_yyyy.hdf5 file.
Example - $ ./preprocess.py some/directory/GFED4.1s_2015.hdf5
If a valid file path is passed to the utility it should output individual JSO
files for each month, that contain data in the format required to train the
emissions predictor."
By default the new files will be ouput to the same directory that contains
the script. Alternatively, you can provide a second argument with a path to
another directory for the output files to be placed in.
"""
for line in help_text.split()[1:-1]:
print(line.strip())in是方便的我更希望看到这样的情况:
args = len(arguements)
if (args == 2 or args == 3) and arguements[1] != "--help":
...如下所示:
if len(arguements) in (2, 3) and arguements[1] != "--help":
...这个构造使您更明显地看到,您正在寻找长度作为一个特定的值列表之一。
您的Validator类有许多静态方法,但它们并不是这样声明的。这不是很可怕的琵琶。我建议你调查:https://docs.python.org/3/library/functions.html#staticmethod
发布于 2018-01-01 15:44:23
程序定义了如下所示的有效扩展:
valid_extensions = ("hdf“、"hdf4”、"hdf5“、"h4”、"h5“、"he2”、"he5")
然后检查扩展是否有效,如下所示:
if path_string.split(".")-1 in valid_extensions:
在检查扩展是否有效时,允许扩展的大写版本也是个好主意。或者采用更懒惰的方法来允许混合大小写值,方法是将检查写为:
if path_string.split(".")[-1].lower() in valid_extensions:所谓的早期返回可以减少代码嵌套,从而提高代码的可读性。例如,而不是这样:
if path_string.split(".")-1 in valid_extensions: if Path( path_string ).is_file():返回True path_string:print(“\n‘+path_string+ "’不是有效文件。\n”)返回False else: print(“\n输入文件必须是扩展名正确的HDF文件。\n”)返回False
你可以这样写:
if path_string.split(".")[-1] in valid_extensions:
if Path(path_string).is_file():
return True
print("\n'" + path_string + "' is not a valid file.\n")
return False
print("\nThe input file must be an HDF file with a correct extension.\n")
return False代码的其他部分也可以类似地简化。
"%02d" % month样式格式是老式的,现代推荐的方法是使用字符串的format函数。而不是这样:
full_group =组+ "/“+ ("%02d”%月份)+ "/“+叶
最好是这样:
full_group = '{}/{:02d}/{}'.format(group, month, leaf)这些示例中的括号是多余的,在代码中的其他地方也是如此:
valid_extensions = ("hdf“、"hdf4”、"hdf5“、"h4”、"h5“、"he2”、"he5")
https://codereview.stackexchange.com/questions/184001
复制相似问题