我正在为OpenFoam网格文件编写一个Python解析器。当网格很大时,我会面临性能问题。
以下是描述要点的文件的格式:
/*--------------------------------*- C++ -*----------------------------------*\
| ========= | |
| \\ / F ield | OpenFOAM: The Open Source CFD Toolbox |
| \\ / O peration | Version: 2.2.0 |
| \\ / A nd | Web: www.OpenFOAM.org |
| \\/ M anipulation | |
\*---------------------------------------------------------------------------*/
FoamFile
{
version 2.0;
format ascii;
class vectorField;
location "constant/polyMesh";
object points;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
10
(
(2.14633 0.955 -0.627026)
(2.14633 1.005 -0.627026)
(4.0935 0.955 -0.389604)
(4.0935 1.005 -0.389604)
(0.199157 0.955 -0.864447)
(0.199157 1.005 -0.864447)
(3.075 1.005 0.562347)
(3.11114 1.005 0.558563)
(3.075 0.955 0.562347)
(3.11114 0.955 0.558563)
)
// ************************************************************************* //描述四面体点的文件:
/*--------------------------------*- C++ -*----------------------------------*\
| ========= | |
| \\ / F ield | OpenFOAM: The Open Source CFD Toolbox |
| \\ / O peration | Version: 2.2.0 |
| \\ / A nd | Web: www.OpenFOAM.org |
| \\/ M anipulation | |
\*---------------------------------------------------------------------------*/
FoamFile
{
version 2.0;
format ascii;
class faceList;
location "constant/polyMesh";
object faces;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
10
(
3(566037 390932 236201)
3(566037 146948 390932)
3(146948 236201 390932)
3(566037 236201 146948)
3(833456 434809 832768)
3(833456 832768 833463)
3(832768 434809 833463)
3(833456 833463 434809)
3(151487 504429 264888)
3(151487 264888 391870)
)
// ************************************************************************* //下面是边界文件的一个示例:
/*--------------------------------*- C++ -*----------------------------------*\
| ========= | |
| \\ / F ield | OpenFOAM: The Open Source CFD Toolbox |
| \\ / O peration | Version: 2.2.0 |
| \\ / A nd | Web: www.OpenFOAM.org |
| \\/ M anipulation | |
\*---------------------------------------------------------------------------*/
FoamFile
{
version 2.0;
format ascii;
class polyBoundaryMesh;
location "constant/polyMesh";
object boundary;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
2
(
object_surf
{
type wall;
physicalType wall;
nFaces 48738;
startFace 9010058;
}
vacuum_surf
{
type patch;
physicalType patch;
nFaces 167218;
startFace 9112924;
}
)以及我编写的解析这些文件的类:
class ParsedMesh:
""" rep is the path to the directory containing the mesh files """
def __init__(self,rep):
def readFile(ficName):
""" readFile: read a file to parse. Returne a list of the lines of the file without "\n" and ";" character """
fic = open(os.path.join(rep,ficName),"r")
tmp = [ line.replace(';','').replace('\n','').strip() for line in fic ] # delete \n and ;
return [ line for line in tmp if line != '' ] # don't return the empty lines
def parseHeader(self):
res = {}
headerSection = False
### header parsing
for el in self.fileContent:
if el == "FoamFile":
headerSection = True
continue
if headerSection == True:
if el == "{":
continue
elif el == "}":
headerSection = False
return res
else:
tmpEl = el.replace('"','').split()
res[tmpEl[0]] = tmpEl[1]
continue
def parseBoundaryFile(self):
self.fileContent = readFile("boundary")
self.parsedMesh["boundary"]= {}
self.parsedMesh["boundary"]["sections"]= {}
# header
self.parsedMesh["boundary"]["header"] = parseHeader(self)
## body
boundarySection = False
boundaryInnerSection = False
for el in self.fileContent:
if el.split()[0] == "(": # beginning of the values section
boundarySection = True
continue
if el.split()[0] == ")": # end of the values section
boundarySection = False
break
if el == "{":
boundaryInnerSection = True
continue
if el == "}":
boundaryInnerSection = False
continue
# read values
if boundarySection == True:
if boundaryInnerSection == False:
boundName = el
self.parsedMesh["boundary"]["sections"][boundName] = {}
continue
else:
tmpEl = el.split()
self.parsedMesh["boundary"]["sections"][boundName][tmpEl[0]] = tmpEl[1]
continue
def parsePointsFile(self):
self.fileContent = readFile("points")
self.parsedMesh["points"]= {}
# header
self.parsedMesh["points"]["header"] = parseHeader(self)
## body
pointsSection = False
pointNumber = 0
self.parsedMesh["points"]["valuesList"] = []
for el in self.fileContent:
if el == "(": # beginning of the value section
pointsSection = True
continue
if el == ")": # end of the value section
pointsSection = False
break
# read the values
if pointsSection == True:
pointNumber += 1
self.parsedMesh["points"]["valuesList"].append(numpy.array([float(el2) for el2 in el[1:-1].split()]))
continue
def parseFacesFile(self):
self.fileContent = readFile("faces")
self.parsedMesh["faces"]= {}
# header
self.parsedMesh["faces"]["header"] = parseHeader(self)
## body
pointsSection = False
pointNumber = 0
self.parsedMesh["faces"]["valuesList"] = []
for el in self.fileContent:
if el == "(": # beginning of the value section
pointsSection = True
continue
if el == ")": # end of the value section
pointsSection = False
break
# read the values
if pointsSection == True:
pointNumber += 1
self.parsedMesh["faces"]["valuesList"].append([int(el2) for el2 in el[2:-1].split()])
continue
self.parsedMesh = {}
self.fileContent = []
parseBoundaryFile(self)
parsePointsFile(self)
parseFacesFile(self)任何允许性能改进的想法都是值得赞赏的。任何其他评论也是受欢迎的(我是一个使用Python的物理学家,所以可能会犯很多明显的错误)。
发布于 2015-08-18 15:38:02
几点建议:
if测试的数量。fromstring函数,可以为您解析一个字符串到numpy数组。它比你的方法快得多。这也可能是性能问题的一个主要来源。with打开文件。即使出现错误,这也可以安全地关闭它们。在默认的Python版本中,当函数退出时,文件会自动关闭,但在其他Python实现(如Pypy )中不一定会出现这种情况。当您打算关闭文件时,使用with关闭这些文件要安全得多。key, value = el.split()。dict。这违背了上课的目的。您应该将组件解析为类属性,或者只使用函数并返回一个dict。rep是一个目录,而不是重复。重复可能在目录名中,但没有理由必须重复。所以这是风格,但我会叫它dirname或什么的。没有理由在精神上限制你如何像那样组织你的文件。__init__的所有解析器子函数。这又一次违背了上课的目的。它们应该是方法。object派生。因此,总的来说,我将如何构造代码:
def clean_lines(lines):
for line in lines:
line = line.strip().strip(';')
if not line:
continue
yield line
def consume_lines(lines, targ):
for line in lines:
if line == targ:
return
def header_parser(lines):
consume_lines(lines, '{')
res = {}
for line in lines:
if line == '}':
break
key, value = line.split(maxsplit=1)
res[key] = value.strip('"')
return res
def boundary_parser(lines):
consume_lines(lines, '(')
sections = {}
for line in lines:
if line == ')':
break
if line != '{':
name = line
sections[name] = {}
continue
for subline in lines:
if subline == '}':
break
key, value = subline.split(maxsplit=1)
sections[name][key] = value
return sections
def points_parser(lines):
consume_lines(lines, '(')
points = []
for line in lines:
if line == ')':
break
points.append(np.fromstring(line[1:-1], sep=' '))
return points
def faces_parser(lines):
consume_lines(lines, '(')
faces = []
for line in lines:
if line == ')':
break
faces.append(np.fromstring(line[2:-1], dtype=np.int32, sep=' '))
return faces
class ParsedMesh(object):
def __init__(self, dirname):
self.dirname = dirname
self.parse_boundary_file()
self.parse_points_file()
self.parse_faces_file()
def _parser(self, parser, fname, dirname):
if dirname is None:
dirname = self.dirname
if dirname:
fname = os.path.join(dirname, fname)
with open(fname) as fobj:
lines = clean_lines(fobj)
header = header_parser(lines)
parsed = parser(lines)
return parsed, header
def parse_boundary_file(self, fname='boundary', dirname=None):
self.boundary, self.boundary_hdr = self._parser(boundary_parser,
fname=fname,
dirname=dirname)
def parse_points_file(self, fname='points', dirname=None):
self.points, self.points_hdr = self._parser(points_parser,
fname=fname,
dirname=dirname)
def parse_faces_file(self, fname='faces', dirname=None):
self.faces, self.faces_hdr = self._parser(faces_parser,
fname=fname,
dirname=dirname)https://codereview.stackexchange.com/questions/101191
复制相似问题