我使用boost_1.59中的spirit来解析一种类似c的语言(名为stone,最初由java编写)。但是我在使用boost::spirit::qi解析石块上的c样式字符串时遇到了麻烦。整个代码都在coliru上。
下面列出了我编写的解析器规则。
template <typename Iterator, typename Lexer>
struct StoneGrammar
: qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
{
template <typename TokenDef>
StoneGrammar(TokenDef const& tok)
: StoneGrammar::base_type(program)
{
using boost::spirit::_val;
using boost::spirit::no_skip;
/* Grammar for Stone:
primary : "(" expr ")" | NUMBER | IDENTIFIER | STRING
factor : "-" primary | primary
expr : factor { OP factor }
block : "{" [ statement ] { (";" | EOL) [ statement ] } "}"
//block : "{" [ statement ] { (";" | EOL) [ statement ] } "}"
simple : expr
statement : "if" expr block [ "else" block ]
| "while" expr block
| simple
program : { statement [";"] }
*/
program
= -statement >> (qi::lit(';') | '\n')
;
statement
= if_stmt
| while_stmt
| simple_stmt
;
if_stmt
= "if" >> expression >> block >> -("else" >> block)
;
while_stmt
= "while" >> expression >> block;
simple_stmt
= expression
;
block
= '{' >> -statement >> *((qi::lit(';') | '\n') >> -statement) >> '}'
;
expression
= factor >> *(op >> factor)
;
factor
= '-' >> primary
| primary
;
op
= qi::lit('+')
| '-'
| '*'
| '/'
| qi::lit('=')[std::cout << _1 << "= parserd" << std::endl]
| qi::lit("||")[std::cout << _1 << " || parsed" << std::endl]
;
primary
= '(' >> expression >> ')'
| tok.identifier[std::cout << _1 << std::endl]
| tok.number[std::cout << _1 << std::endl]
| unesc_str
;
unesc_char.add("\\a", '\a')("\\b", '\b')("\\f", '\f')("\\n", '\n')
("\\r", '\r')("\\t", '\t')("\\v", '\v')
("\\\\", '\\')("\\\'", '\'')("\\\"", '\"')
;
unesc_str = qi::lit('"') >> (qi::alnum | ("\\x" >> qi::hex)) >> '"';
}
typedef boost::variant<unsigned int, std::string> expression_type;
qi::rule<Iterator, qi::in_state_skipper<Lexer> > program, block, statement;
qi::rule<Iterator, qi::in_state_skipper<Lexer> > assignment, if_stmt;
qi::rule<Iterator, qi::in_state_skipper<Lexer> > while_stmt;
qi::rule<Iterator, qi::in_state_skipper<Lexer> > simple_stmt;
qi::rule<Iterator, qi::in_state_skipper<Lexer> > op;
qi::rule<Iterator, qi::in_state_skipper<Lexer> > factor;
qi::rule<Iterator, qi::in_state_skipper<Lexer> > primary;
qi::rule<Iterator, std::string(), qi::in_state_skipper<Lexer> > string_literal;
qi::rule<Iterator, std::string(), qi::in_state_skipper<Lexer> > unesc_str;
qi::symbols<char const, char const> unesc_char;
// the expression is the only rule having a return value
qi::rule<Iterator, expression_type(), qi::in_state_skipper<Lexer> > expression;
};下面列出了使用某些令牌定义解析器。
template <typename Lexer>
struct StoneToken : lex::lexer<Lexer>
{
StoneToken()
{
// define the tokens to match
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
number = "[0-9]+";
// !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
// associate the tokens and the token set with the lexer
this->self
= lex::token_def<>('(') | ')' | '{' | '}' | '[' | ']'
| '+' | '-' | '*' | '/'
| '='
| "==" | "<=" | ">="
| '>' | '<' | "&&"
//| "\||"
| '!' | '^' | '|' | '~' | '&'
| '%'
| '\'' | ',' | '"' | '\n' | ';' | '.' | '_'
| "kkk"
;
//this->self += number | if_ | else_ | while_ | identifier;
this->self += number | identifier;
// define the whitespace to ignore (spaces, tabs, and C-style
// comments)
this->self("WS")
= lex::token_def<>("[ \\t]+")
| R"(\/\*[^*]*\*+([^/*][^*]*\*+)*\/)"
| R"(\/\/[^\n]*\n)"
;
}
lex::token_def<std::string> identifier, op;
lex::token_def<unsigned int> number;
}; 但是,编译器无法编译unesc_str规则。下面列出了clang++-3.6中错误报告的开始部分。
/usr/include/boost/type_traits/make_unsigned.hpp:38:4: error: static_assert failed
"(::boost::type_traits::ice_or< ::boost::is_integral<T>::value,
::boost::is_enum<T>::value>::value)"
BOOST_STATIC_ASSERT(
^
/usr/include/boost/static_assert.hpp:78:41: note: expanded from macro 'BOOST_STATIC_ASSERT'
# define BOOST_STATIC_ASSERT( ... ) static_assert(__VA_ARGS__, #__VA_ARGS__)
^
/usr/include/boost/type_traits/make_unsigned.hpp:146:70: note: in instantiation of template class
'boost::detail::make_unsigned_imp<boost::spirit::lex::lexertl::token<__gnu_cxx::__normal_iterator<char
*, std::basic_string<char> >, boost::mpl::vector<unsigned int, std::basic_string<char>, mpl_::na,
mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na,
mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>,
mpl_::bool_<true>, unsigned long> >' requested here
BOOST_TT_AUX_TYPE_TRAIT_DEF1(make_unsigned,T,typename boost::detail::make_unsigned_imp<T>::type)
^
/usr/include/boost/type_traits/detail/type_trait_def.hpp:21:13: note: expanded from macro
'BOOST_TT_AUX_TYPE_TRAIT_DEF1'
typedef result type; \
^
/usr/include/boost/spirit/home/support/char_class.hpp:51:34: note: in instantiation of template class
'boost::make_unsigned<boost::spirit::lex::lexertl::token<__gnu_cxx::__normal_iterator<char *,
std::basic_string<char> >, boost::mpl::vector<unsigned int, std::basic_string<char>, mpl_::na,
mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na,
mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>,
mpl_::bool_<true>, unsigned long> >' requested here
typedef typename make_unsigned<SourceChar>::type USourceChar;然而,我发现如果我删除了unesc_str规则中的qi::alnum,编译是成功的。
等待答案!
发布于 2015-10-10 00:05:42
您的问题(主要)是由使用lexer引起的。为什么会有它?
首先使用词法分析标记,然后使用一系列字符解析器解析字符串,这两者之间存在逻辑矛盾。
最明显的解决办法是为alnum之类的东西创建“令牌”,但坦率地说,我得到的印象是您对spirit还有点陌生,需要从更简单的开始。
如果你愿意,我可以稍后编写一个更简单的语法。
附注:最好不要使用using namespace,因为你会得到虚假的(无声的)冲突。例如,我真的很难知道那里使用的是::_1还是boost::spirit::qi::_1。
这里有半个清理,这让我可以接近你问的问题:http://paste.ubuntu.com/12724893/
更新
这是一个,我很享受,因为我对那些无能的语法花了太多的时间。我在这里收集了直播编码流:part #1、part #2、part #3和part #4
下面是一个解析这些结果的工作语法:
start = skip(blank) [ program ];
program = -statement % (eol|';');
statement = conditionalstatement | whileloop | simple;
simple = !eoi >> expression.alias();
conditionalstatement = "if" >> expression >> block >> -("else" >> block);
whileloop = "while" >> expression >> block;
block = '{' >> program >> '}';
expression = e_top.alias();
e_simple = '(' >> expression >> ')' | number | string | identifier;
e_top = attr_cast<ast::BinaryExpr, ast::BinaryExpr>(copy(e_factor >> char_("*/") >> e_top)) | e_factor;
e_factor = attr_cast<ast::BinaryExpr, ast::BinaryExpr>(copy(e_term >> char_("-+") >> e_factor)) | e_term;
e_term = unaryexpr | e_simple;
unaryexpr = char_('-') >> e_simple;
number = real_parser<double, ureal_policies<double> >();
string = '"' >> *~char_('"') >> '"';
identifier = raw [ alpha >> *(alnum | '_') ];表达式解析器有一些“人工”节点,以确保AST反映通常的运算符优先级。
ast命名空间包含公开的类型。实际上,我首先编写了这些(参见实时流录制):
namespace {
template <typename Tag> struct Literal;
namespace Tags {
struct NumberLiteral;
struct StringLiteral;
}
template <> struct Literal<Tags::NumberLiteral> { double value; };
template <> struct Literal<Tags::StringLiteral> { std::string value; };
}
namespace {
using Identifier = std::string;
using Number = Literal<Tags::NumberLiteral>;
using String = Literal<Tags::StringLiteral>;
struct UnaryExpr;
struct BinaryExpr;
using Expression = boost::make_recursive_variant<
Number,
Identifier,
String,
boost::recursive_variant_, // refers to Expression itself
boost::recursive_wrapper<UnaryExpr>,
boost::recursive_wrapper<BinaryExpr>
>::type;
struct UnaryExpr {
char op; // -
Expression expr;
};
struct BinaryExpr {
char op; // +-/*
Expression lhs, rhs;
};
using Simple = Expression;
}
namespace {
struct ConditionalStatement;
struct WhileLoop;
using Statement = boost::make_recursive_variant<
boost::recursive_wrapper<ConditionalStatement>,
boost::recursive_wrapper<WhileLoop>,
Simple
>::type;
using Block = std::vector<Statement>;
struct ConditionalStatement {
Expression condition;
Block true_branch;
boost::optional<Block> false_branch;
};
struct WhileLoop {
Expression condition;
Block body;
};
}
using Program = Block;要解析的代码非常简单:
std::ifstream ifs(fname, std::ios::binary);
It first(ifs >> std::noskipws), last;
stone::ast::Program program;
bool ok = qi::parse(first, last, parser, program);
if (ok)
{
std::cout << "File " << fname << " was parsed succesfully: ";
dumper(program);
std::cout << "\n";
}
else
std::cout << "File " << fname << " failed to parse\n";
if (first!=last)
std::cout << "Warning: trailing unparsed input '" << std::string(first, last) << "'\n";对于输入test1.stone
1; if IsValid {
make
noise;
while 2 * (8+1)/"something else that is unimportant" {
shoot; cannon
}
} else { cry_in_7_corners_; }它转储AST节点:
File test1.stone was parsed succesfully: { 1; if [IsValid]{ [make]; [noise]; while (2* ((8+ 1)/ "something else that is unimportant")){ [shoot]; [cannon]; } ; } else { [cry_in_7_corners_]; } ; }
https://stackoverflow.com/questions/33041768
复制相似问题