我正在尝试从os.walk中删除目录(我不需要这些目录中的文件)
我的代码:
def findit(root, exclude_files=[], exclude_dirs=[]):
exclude_files = (fnmatch.translate(i) for i in exclude_files)
exclude_files = '('+')|('.join(exclude_files)+')'
exclude_files = re.compile(exclude_files)
exclude_dirs = (os.path.normpath(i) for i in exclude_dirs)
exclude_dirs = (os.path.normcase(i) for i in exclude_dirs)
exclude_dirs = set(exclude_dirs)
return (os.path.join(r,f)
for r,_,f in os.walk(root)
if os.path.normpath(os.path.normcase(r)) not in exclude_dirs
for f in f
if not exclude_files.match(os.path.normcase(f)))它可以过滤文件,当我尝试过滤出c:/windows时,它仍然会显示我的文件是否遗漏了什么?
filelist = list(findit('c:/',exclude_files = ['*.dll', '*.dat', '*.log', '*.exe'], exclude_dirs = ['c:/windows', 'c:/program files', 'c:/else']))发布于 2014-08-01 12:23:12
当筛选出目录时,您并不阻止os.walk()进入子目录。
要做到这一点,您需要清除dirs列表:
def findit(root, exclude_files=[], exclude_dirs=[]):
exclude_files = (fnmatch.translate(i) for i in exclude_files)
exclude_files = '('+')|('.join(exclude_files)+')'
exclude_files = re.compile(exclude_files)
exclude_dirs = (os.path.normpath(i) for i in exclude_dirs)
exclude_dirs = (os.path.normcase(i) for i in exclude_dirs)
exclude_dirs = set(exclude_dirs)
for current, dirs, files in os.walk(root):
if os.path.normpath(os.path.normcase(current)) in exclude_dirs:
# exclude this dir and subdirectories
dirs[:] = []
continue
for f in files:
if not exclude_files.match(os.path.normcase(f)):
yield os.path.join(current, f)dirs[:] = []赋值清除列表;它从列表中移除所有脏名。由于这个列表是与os.walk()共享的,而后者使用这个列表随后访问子目录,这实际上阻止了os.walk()访问这些子目录。
发布于 2014-08-01 22:25:51
读到上面的答复,我感到很奇怪。在我看来,os.walk缺失了,根参数似乎没有按需要使用。另外,任何一个可选参数都是空列表的情况应该可以工作。建议对每个目录级别的目录使用较少的名称空间查找和排除通配符进行稍微的更改:
import os
import re
import fnmatch
import os.path
def findit(root, exclude_files=[], exclude_dirs=[], exclude_dirs_wc=[]):
"""Generate all files found under root excluding some.
Excluded files are given as a list of Unix shell-style wildcards
that exclude matches in each directory. Excluded directories are
assumed to be paths starting at root; no wildcards. Directory
wildcards at each level can be supplied.
"""
# Less namespace look-up.
join = os.path.join
normpath = os.path.normpath; normcase = os.path.normcase
#
def make_exclude_regex_from(lst):
if len(lst):
lst = (fnmatch.translate(i) for i in lst)
lst = "({})".format(")|(".join(lst))
lst = re.compile(lst)
return lst
#
exclude_files = make_exclude_regex_from(exclude_files)
exclude_dirs_wc = make_exclude_regex_from(exclude_dirs_wc)
if len(exclude_dirs):
exclude_dirs = (normpath(i) for i in exclude_dirs)
exclude_dirs = (normcase(i) for i in exclude_dirs)
exclude_dirs = set(exclude_dirs)
for current, dirs, files in os.walk(root):
current_dir = normpath(normcase(current))
if exclude_dirs and current_dir in exclude_dirs:
# Prune set of dirs to exclude.
exclude_dirs.discard(current_dir)
# Disregard sub-directories.
dirs[:] = [] # IN PLACE, since it is a loop var.
continue
if exclude_dirs_wc:
for dd in dirs[:]:
if exclude_dirs_wc.match(normcase(dd)):
dirs.remove(dd) # IN PLACE
if exclude_files:
for ff in files[:]:
if exclude_files.match(normcase(ff)):
files.remove(ff) # IN PLACE; also a loop var.
for f in files:
yield join(current,f)发布于 2019-12-11 11:10:54
在使用os.walk("pathName")遍历时,可以使用关键字“继续”跳过迭代。
for dirpath, dirnames, filenames in os.walk(pathName):
# Write regular expression or a string to skip the desired folder
dirpath_pat = re.search(pattern, dirpath)
if dirpath_pat:
if dirpath_pat.group(0):
continuehttps://stackoverflow.com/questions/25080096
复制相似问题