文章/答案/技术大牛

发布

社区首页 >问答首页 >用Python解析OpenFoam文件

问用Python解析OpenFoam文件
EN

Code Review用户

提问于 2015-08-17 14:01:12

回答 1查看 1.8K关注 0票数 6

我正在为OpenFoam网格文件编写一个Python解析器。当网格很大时，我会面临性能问题。

以下是描述要点的文件的格式：

/*--------------------------------*- C++ -*----------------------------------*\
| =========                 |                                                 |
| \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox           |
|  \\    /   O peration     | Version:  2.2.0                                 |
|   \\  /    A nd           | Web:      www.OpenFOAM.org                      |
|    \\/     M anipulation  |                                                 |
\*---------------------------------------------------------------------------*/
FoamFile
{
    version     2.0;
    format      ascii;
    class       vectorField;
    location    "constant/polyMesh";
    object      points;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //


10
(
(2.14633 0.955 -0.627026)
(2.14633 1.005 -0.627026)
(4.0935 0.955 -0.389604)
(4.0935 1.005 -0.389604)
(0.199157 0.955 -0.864447)
(0.199157 1.005 -0.864447)
(3.075 1.005 0.562347)
(3.11114 1.005 0.558563)
(3.075 0.955 0.562347)
(3.11114 0.955 0.558563)
)


// ************************************************************************* //

描述四面体点的文件：

/*--------------------------------*- C++ -*----------------------------------*\
| =========                 |                                                 |
| \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox           |
|  \\    /   O peration     | Version:  2.2.0                                 |
|   \\  /    A nd           | Web:      www.OpenFOAM.org                      |
|    \\/     M anipulation  |                                                 |
\*---------------------------------------------------------------------------*/
FoamFile
{
    version     2.0;
    format      ascii;
    class       faceList;
    location    "constant/polyMesh";
    object      faces;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //


10
(
3(566037 390932 236201)
3(566037 146948 390932)
3(146948 236201 390932)
3(566037 236201 146948)
3(833456 434809 832768)
3(833456 832768 833463)
3(832768 434809 833463)
3(833456 833463 434809)
3(151487 504429 264888)
3(151487 264888 391870)
)


// ************************************************************************* //

下面是边界文件的一个示例：

/*--------------------------------*- C++ -*----------------------------------*\
| =========                 |                                                 |
| \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox           |
|  \\    /   O peration     | Version:  2.2.0                                 |
|   \\  /    A nd           | Web:      www.OpenFOAM.org                      |
|    \\/     M anipulation  |                                                 |
\*---------------------------------------------------------------------------*/
FoamFile
{
    version     2.0;
    format      ascii;
    class       polyBoundaryMesh;
    location    "constant/polyMesh";
    object      boundary;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //

2
(
    object_surf
    {
        type            wall;
        physicalType    wall;
        nFaces          48738;
        startFace       9010058;
    }
    vacuum_surf
    {
        type            patch;
        physicalType    patch;
        nFaces          167218;
        startFace       9112924;
    }
)

以及我编写的解析这些文件的类：

class ParsedMesh:
    """ rep is the path to the directory containing the mesh files """
    def __init__(self,rep):

        def readFile(ficName):
            """ readFile: read a file to parse. Returne a list of the lines of the file without "\n" and ";" character """
            fic = open(os.path.join(rep,ficName),"r")
            tmp = [ line.replace(';','').replace('\n','').strip() for line in fic ] # delete \n and ;
            return [ line for line in tmp if line != '' ] # don't return the empty lines

        def parseHeader(self):
            res = {}
            headerSection = False
            ### header parsing
            for el in self.fileContent:
                if el == "FoamFile":
                    headerSection = True
                    continue
                if headerSection == True:
                    if el == "{":
                        continue
                    elif el == "}":
                        headerSection = False
                        return res
                    else:
                        tmpEl = el.replace('"','').split()
                        res[tmpEl[0]] = tmpEl[1]
                        continue

        def parseBoundaryFile(self):
            self.fileContent = readFile("boundary")
            self.parsedMesh["boundary"]= {}
            self.parsedMesh["boundary"]["sections"]= {}
            # header
            self.parsedMesh["boundary"]["header"] = parseHeader(self)
            ## body
            boundarySection = False
            boundaryInnerSection = False
            for el in self.fileContent:
                if el.split()[0] == "(": # beginning of the values section
                    boundarySection = True
                    continue
                if el.split()[0] == ")": # end of the values section
                    boundarySection = False
                    break
                if el == "{":
                    boundaryInnerSection = True
                    continue
                if el == "}":
                    boundaryInnerSection = False
                    continue
                # read values
                if boundarySection == True:
                    if boundaryInnerSection == False:
                        boundName = el
                        self.parsedMesh["boundary"]["sections"][boundName] = {}
                        continue
                    else:
                        tmpEl = el.split()
                        self.parsedMesh["boundary"]["sections"][boundName][tmpEl[0]] = tmpEl[1]
                        continue

        def parsePointsFile(self):
            self.fileContent = readFile("points")
            self.parsedMesh["points"]= {}
            # header
            self.parsedMesh["points"]["header"] = parseHeader(self)
            ## body
            pointsSection = False
            pointNumber = 0
            self.parsedMesh["points"]["valuesList"] = []
            for el in self.fileContent:
                if el == "(": # beginning of the value section
                    pointsSection = True
                    continue
                if el == ")": # end of the value section
                    pointsSection = False
                    break
                # read the values
                if pointsSection == True:
                    pointNumber += 1
                    self.parsedMesh["points"]["valuesList"].append(numpy.array([float(el2) for el2 in el[1:-1].split()]))
                    continue

        def parseFacesFile(self):
            self.fileContent = readFile("faces")
            self.parsedMesh["faces"]= {}
            # header
            self.parsedMesh["faces"]["header"] = parseHeader(self)
            ## body
            pointsSection = False
            pointNumber = 0
            self.parsedMesh["faces"]["valuesList"] = []
            for el in self.fileContent:
                if el == "(": # beginning of the value section
                    pointsSection = True
                    continue
                if el == ")": # end of the value section
                    pointsSection = False
                    break
                # read the values
                if pointsSection == True:
                    pointNumber += 1
                    self.parsedMesh["faces"]["valuesList"].append([int(el2) for el2 in el[2:-1].split()])
                    continue

        self.parsedMesh = {}
        self.fileContent = []
        parseBoundaryFile(self)
        parsePointsFile(self)
        parseFacesFile(self)

任何允许性能改进的想法都是值得赞赏的。任何其他评论也是受欢迎的(我是一个使用Python的物理学家，所以可能会犯很多明显的错误)。

python

performance

parsing

回答 1

Code Review用户

回答已采纳

发布于 2015-08-18 15:38:02

几点建议：

跟着pep8走。您的代码已经很好了，但是您的命名是不正确的。
虽然我可以看到将所有内容分组到一个类中的好处，但是单独的文件解析器并不直接依赖于该类。我会把它们分成它们自己的功能。这将使测试特别容易。然后，您可以在调用解析器函数的类中使用包装器方法。
您正在将每个文件完全加载到列表中。对于大型文件，这需要大量的内存。更糟糕的是，它需要解析整个列表两次，一次用于标题，一次用于主体。这可能是您的性能问题的一个重要来源。对文件中的行进行迭代将大大提高内存效率。我建议将读取器转换为一个需要迭代器(通常是文件，但可能是用于测试的任意字符串列表)的生成器，执行剥离操作，生成已剥离的行，并跳过空行。这有一个额外的优点，即它将跟踪您的进度，因此当您想要解析主体时，不需要再次返回并读取标题。
如果您使用一个生成器，您可以创建一个for循环，直到它到达您想要的部分，然后中断，然后您可以有第二个for循环，它可以拾取第一个中断的位置。这将大大减少您必须做的if测试的数量。
你是在自己分析数字列表。不要，numpy有一个fromstring函数，可以为您解析一个字符串到numpy数组。它比你的方法快得多。这也可能是性能问题的一个主要来源。
您应该始终使用with打开文件。即使出现错误，这也可以安全地关闭它们。在默认的Python版本中，当函数退出时，文件会自动关闭，但在其他Python实现(如Pypy )中不一定会出现这种情况。当您打算关闭文件时，使用with关闭这些文件要安全得多。
您可以使用元组解压来拆分您的数据行。所以key, value = el.split()。
创建一个类，然后将所有内容解析为一个包含所有内容的dict。这违背了上课的目的。您应该将组件解析为类属性，或者只使用函数并返回一个dict。
你硬编码文件名。我会设置文件名参数，默认参数是默认名称。
rep是一个目录，而不是重复。重复可能在目录名中，但没有理由必须重复。所以这是风格，但我会叫它dirname或什么的。没有理由在精神上限制你如何像那样组织你的文件。
您将使__init__的所有解析器子函数。这又一次违背了上课的目的。它们应该是方法。
您的类应该从object派生。

因此，总的来说，我将如何构造代码：

def clean_lines(lines):
    for line in lines:
        line = line.strip().strip(';')
        if not line:
            continue
        yield line


def consume_lines(lines, targ):
    for line in lines:
        if line ==  targ:
            return


def header_parser(lines):
    consume_lines(lines, '{')

    res = {}
    for line in lines:
        if line == '}':
            break
        key, value = line.split(maxsplit=1)
        res[key] = value.strip('"')

    return res


def boundary_parser(lines):
    consume_lines(lines, '(')

    sections = {}
    for line in lines:
        if line == ')':
            break
        if line != '{':
            name = line
            sections[name] = {}
            continue
        for subline in lines:
            if subline == '}':
                break
            key, value = subline.split(maxsplit=1)
            sections[name][key] = value

    return sections


def points_parser(lines):
    consume_lines(lines, '(')

    points = []
    for line in lines:
        if line == ')':
            break
        points.append(np.fromstring(line[1:-1], sep=' '))
    return points


def faces_parser(lines):
    consume_lines(lines, '(')

    faces = []
    for line in lines:
        if line == ')':
            break
        faces.append(np.fromstring(line[2:-1], dtype=np.int32, sep=' '))

    return faces


class ParsedMesh(object):
    def __init__(self, dirname):
        self.dirname = dirname
        self.parse_boundary_file()
        self.parse_points_file()
        self.parse_faces_file()

    def _parser(self, parser, fname, dirname):
        if dirname is None:
            dirname = self.dirname
        if dirname:
            fname = os.path.join(dirname, fname)
        with open(fname) as fobj:
            lines = clean_lines(fobj)
            header = header_parser(lines)
            parsed = parser(lines)
        return parsed, header

    def parse_boundary_file(self, fname='boundary', dirname=None):
        self.boundary, self.boundary_hdr = self._parser(boundary_parser,
                                                        fname=fname,
                                                        dirname=dirname)

    def parse_points_file(self, fname='points', dirname=None):
        self.points, self.points_hdr = self._parser(points_parser,
                                                    fname=fname,
                                                    dirname=dirname)

    def parse_faces_file(self, fname='faces', dirname=None):
        self.faces, self.faces_hdr = self._parser(faces_parser,
                                                  fname=fname,
                                                  dirname=dirname)

票数 4

页面原文内容由Code Review提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://codereview.stackexchange.com/questions/101191

复制

相似问题

问用Python解析OpenFoam文件
EN

回答 1

Code Review用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问用Python解析OpenFoam文件EN

回答 1

Code Review用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问用Python解析OpenFoam文件
EN