我正在使用Boost精神x3解析Newick树格式,但我无法解析完整的树。
最小可重现性示例
这是我尝试的解决方案:
namespace quetzal::newick::parser
{
namespace x3 = boost::spirit::x3;
using x3::alpha;
using x3::alnum;
using x3::double_;
using x3::rule;
using x3::lit;
rule<struct branch> branch{"branch"};
auto name = alpha >> *alnum; // to be improved later
auto length = ':' >> double_;
auto leaf = -name;
auto internal= '(' >> (branch % ',') >> ')' >> -name;
auto subtree = leaf | internal;
auto tree = subtree >> ';';
auto const branch_def = subtree >> -length;
BOOST_SPIRIT_DEFINE(branch);
}解析内部语法的测试似乎有效。
BOOST_AUTO_TEST_CASE(internal_grammar)
{
std::vector<std::string> inputs =
{
"(,)",
"(A,B)F",
"(A:10,B:10)F"
};
for(const auto& input : inputs)
{
auto iter = input.begin();
auto iter_end = input.end();
bool r = phrase_parse(iter, iter_end, quetzal::newick::parser::internal, x3::space );
BOOST_CHECK(r && iter == iter_end);
}
}但是完整的解析器tree无法解析除第一棵树之外的所有数据,我不明白为什么:
BOOST_AUTO_TEST_CASE(full_grammar)
{
std::vector<std::string> inputs =
{
";",
"(,);",
"(,,(,));",
"(A,B,(C,D));",
"(A,B,(C,D)E)F;",
"(:0.1,:0.2,(:0.3,:0.4):0.5);",
"(:0.1,:0.2,(:0.3,:0.4):0.5):0.0;",
"(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);",
"(A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F;",
"((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;"
};
for(const auto& input : inputs)
{
auto iter = input.begin();
auto iter_end = input.end();
bool r = phrase_parse(iter, iter_end, quetzal::newick::parser::tree, x3::space );
BOOST_CHECK(r && iter == iter_end);
}
}可能的缺点
x3::lit,但是这个问题似乎清除了它。auto (来自Michael介绍cppcon,但我希望在这里对递归规则适当地使用x3::rule。发布于 2022-11-15 22:10:02
我自己做的:住在Coliru
现在,当您想要理解X3语法时--除了精神调试之外--您可以启用
#define BOOST_SPIRIT_X3_DEBUG这是规则。考虑添加一些仅用于调试的规则,以获得更详细的信息:
auto dbg(auto name, auto p) { return x3::rule<struct _>{name} = p; };
auto name = dbg("name", x3::alpha >> *x3::alnum); // to be improved later
auto length = dbg("length", ':' >> x3::double_);
auto leaf = dbg("leaf", -name);
auto internal = dbg("internal", '(' >> (branch % ',') >> ')' >> -name);
auto subtree = dbg("subtree", leaf | internal);
auto tree = dbg("tree", subtree >> ';');现在输出将是:活着
<tree>
<try>;</try>
<subtree>
<try>;</try>
<leaf>
<try>;</try>
<name>
<try>;</try>
<fail/>
</name>
<success>;</success>
</leaf>
<success>;</success>
</subtree>
<success></success>
</tree>
";" -> true true您可以“跟踪”规则调用和结果。现在,让我们来看看第一个失败:
<tree>
<try>(,);</try>
<subtree>
<try>(,);</try>
<leaf>
<try>(,);</try>
<name>
<try>(,);</try>
<fail/>
</name>
<success>(,);</success>
</leaf>
<success>(,);</success>
</subtree>
<fail/>
</tree>
"(,);" -> false false您可以看到它尝试子树,它尝试叶,因为从定义上来说,leaf是可选的:
auto leaf = -name;解析器形状的-p总是成功的。因此,在a|b中,当a = -p时,替代的b将不会调用。要么减少name的可选性,要么重新排序您的分支,因此在决定一个空的leaf是否匹配之前,internal将获得一个机会:
auto subtree = internal | leaf;现在我们得到:
void quetzal::newick::test::tree()
";" -> true true
"(,);" -> true true
"(,,(,));" -> true true
"(A,B,(C,D));" -> true true
"(A,B,(C,D)E)F;" -> true true
"(:0.1,:0.2,(:0.3,:0.4):0.5);" -> true true
"(:0.1,:0.2,(:0.3,:0.4):0.5):0.0;" -> false false
"(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);" -> true true
"(A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F;" -> true true
"((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;" -> true true看看剩下的一个失败的解析:
<tree>
<try>(:0.1,:0.2,(:0.3,:0.</try>
<subtree>
<try>(:0.1,:0.2,(:0.3,:0.</try>
<internal>
<try>(:0.1,:0.2,(:0.3,:0.</try>
<branch>
<try>:0.1,:0.2,(:0.3,:0.4</try>
<subtree>
<try>:0.1,:0.2,(:0.3,:0.4</try>
<internal>
<try>:0.1,:0.2,(:0.3,:0.4</try>
<fail/>
</internal>
<leaf>
<try>:0.1,:0.2,(:0.3,:0.4</try>
<name>
<try>:0.1,:0.2,(:0.3,:0.4</try>
<fail/>
</name>
<success>:0.1,:0.2,(:0.3,:0.4</success>
</leaf>
<success>:0.1,:0.2,(:0.3,:0.4</success>
</subtree>
<length>
<try>:0.1,:0.2,(:0.3,:0.4</try>
<success>,:0.2,(:0.3,:0.4):0.</success>
</length>
<success>,:0.2,(:0.3,:0.4):0.</success>
</branch>
<branch>
<try>:0.2,(:0.3,:0.4):0.5</try>
<subtree>
<try>:0.2,(:0.3,:0.4):0.5</try>
<internal>
<try>:0.2,(:0.3,:0.4):0.5</try>
<fail/>
</internal>
<leaf>
<try>:0.2,(:0.3,:0.4):0.5</try>
<name>
<try>:0.2,(:0.3,:0.4):0.5</try>
<fail/>
</name>
<success>:0.2,(:0.3,:0.4):0.5</success>
</leaf>
<success>:0.2,(:0.3,:0.4):0.5</success>
</subtree>
<length>
<try>:0.2,(:0.3,:0.4):0.5</try>
<success>,(:0.3,:0.4):0.5):0.</success>
</length>
<success>,(:0.3,:0.4):0.5):0.</success>
</branch>
<branch>
<try>(:0.3,:0.4):0.5):0.0</try>
<subtree>
<try>(:0.3,:0.4):0.5):0.0</try>
<internal>
<try>(:0.3,:0.4):0.5):0.0</try>
<branch>
<try>:0.3,:0.4):0.5):0.0;</try>
<subtree>
<try>:0.3,:0.4):0.5):0.0;</try>
<internal>
<try>:0.3,:0.4):0.5):0.0;</try>
<fail/>
</internal>
<leaf>
<try>:0.3,:0.4):0.5):0.0;</try>
<name>
<try>:0.3,:0.4):0.5):0.0;</try>
<fail/>
</name>
<success>:0.3,:0.4):0.5):0.0;</success>
</leaf>
<success>:0.3,:0.4):0.5):0.0;</success>
</subtree>
<length>
<try>:0.3,:0.4):0.5):0.0;</try>
<success>,:0.4):0.5):0.0;</success>
</length>
<success>,:0.4):0.5):0.0;</success>
</branch>
<branch>
<try>:0.4):0.5):0.0;</try>
<subtree>
<try>:0.4):0.5):0.0;</try>
<internal>
<try>:0.4):0.5):0.0;</try>
<fail/>
</internal>
<leaf>
<try>:0.4):0.5):0.0;</try>
<name>
<try>:0.4):0.5):0.0;</try>
<fail/>
</name>
<success>:0.4):0.5):0.0;</success>
</leaf>
<success>:0.4):0.5):0.0;</success>
</subtree>
<length>
<try>:0.4):0.5):0.0;</try>
<success>):0.5):0.0;</success>
</length>
<success>):0.5):0.0;</success>
</branch>
<name>
<try>:0.5):0.0;</try>
<fail/>
</name>
<success>:0.5):0.0;</success>
</internal>
<success>:0.5):0.0;</success>
</subtree>
<length>
<try>:0.5):0.0;</try>
<success>):0.0;</success>
</length>
<success>):0.0;</success>
</branch>
<name>
<try>:0.0;</try>
<fail/>
</name>
<success>:0.0;</success>
</internal>
<success>:0.0;</success>
</subtree>
<fail/>
</tree>
"(:0.1,:0.2,(:0.3,:0.4):0.5):0.0;" -> false false从末尾看清楚,问题是长度(":0.0")是在最后一个括号之外遇到的,在这里它不是预期的。也许您忘记了使用tree作为规则,而不是branch?不管怎么说,你也许可以从这里拿下来。
旁注
你使用的是一个船长,除非你制定一些规则(比如name),否则你的生活可能会因此而改变。我还建议把船长写进你的语法
auto tree = x3::skip(x3::space) [ subtree >> ';' ];请注意,space包含换行符,所以您可能真的需要blank。最后,可以通过附加f == l将>> eoi迭代器签入语法。
auto tree = x3::skip(x3::space) [ subtree >> ';' >> x3::eoi ];全上市
此外,还解决了附带说明,并删除了调试/注释内容:
住在Coliru
#include <boost/spirit/home/x3.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace quetzal::newick::parser {
x3::rule<struct branch> branch{"branch"};
auto name = x3::lexeme[x3::alpha >> *x3::alnum]; // to be improved later
auto length = ':' >> x3::double_;
auto leaf = -name;
auto internal = '(' >> (branch % ',') >> ')' >> -name;
auto subtree = internal | leaf;
auto tree = x3::skip(x3::blank)[subtree >> ';' >> x3::eoi];
auto branch_def = subtree >> -length;
BOOST_SPIRIT_DEFINE(branch)
} // namespace quetzal::newick::parser
namespace quetzal::newick::test {
void run_tests(auto name, auto p, std::initializer_list<char const*> cases) {
std::cerr << "============ running " << name << " tests:\n";
for (std::string const input : cases)
std::cout << quoted(input) << " \t-> " << std::boolalpha
<< parse(begin(input), end(input), p) << std::endl;
}
void internal() {
run_tests("internal", quetzal::newick::parser::internal,
{
"(,)",
"(A,B)F",
"(A:10,B:10)F",
});
}
void tree() {
run_tests("tree", quetzal::newick::parser::tree,
{
";",
"(,);",
"(,,(,));",
"(A,B,(C,D));",
"(A,B,(C,D)E)F;",
"(:0.1,:0.2,(:0.3,:0.4):0.5);",
"(:0.1,:0.2,(:0.3,:0.4):0.5):0.0;",
"(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);",
"(A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F;",
"((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;",
});
}
} // namespace quetzal::newick::test
int main() {
using namespace quetzal::newick::test;
internal();
tree();
}打印
============ running internal tests:
"(,)" -> true
"(A,B)F" -> true
"(A:10,B:10)F" -> true
============ running tree tests:
";" -> true
"(,);" -> true
"(,,(,));" -> true
"(A,B,(C,D));" -> true
"(A,B,(C,D)E)F;" -> true
"(:0.1,:0.2,(:0.3,:0.4):0.5);" -> true
"(:0.1,:0.2,(:0.3,:0.4):0.5):0.0;" -> false
"(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);" -> true
"(A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F;" -> true
"((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;" -> truehttps://stackoverflow.com/questions/74452319
复制相似问题