/* * PUBLIC DOMAIN PCCTS-BASED C++ GRAMMAR (cplusplus.g, stat.g, expr.g) * * Authors: Sumana Srinivasan, NeXT Inc.; sumana_srinivasan@next.com * Terence Parr, Parr Research Corporation; parrt@parr-research.com * Russell Quong, Purdue University; quong@ecn.purdue.edu * * VERSION 1.1 * * SOFTWARE RIGHTS * * This file is a part of the ANTLR-based C++ grammar and is free * software. We do not reserve any LEGAL rights to its use or * distribution, but you may NOT claim ownership or authorship of this * grammar or support code. An individual or company may otherwise do * whatever they wish with the grammar distributed herewith including the * incorporation of the grammar or the output generated by ANTLR into * commerical software. You may redistribute in source or binary form * without payment of royalties to us as long as this header remains * in all source distributions. * * We encourage users to develop parsers/tools using this grammar. * In return, we ask that credit is given to us for developing this * grammar. By "credit", we mean that if you incorporate our grammar or * the generated code into one of your programs (commercial product, * research project, or otherwise) that you acknowledge this fact in the * documentation, research report, etc.... In addition, you should say nice * things about us at every opportunity. * * As long as these guidelines are kept, we expect to continue enhancing * this grammar. Feel free to send us enhancements, fixes, bug reports, * suggestions, or general words of encouragement at parrt@parr-research.com. * * NeXT Computer Inc. * 900 Chesapeake Dr. * Redwood City, CA 94555 * 12/02/1994 * * Restructured for public consumption by Terence Parr late February, 1995. * * Requires PCCTS 1.32b4 or higher to get past ANTLR. * * DISCLAIMER: we make no guarantees that this grammar works, makes sense, * or can be used to do anything useful. */ class CPPParser { expression : assignment_expression (COMMA assignment_expression)* ; /* right-to-left for assignment op */ assignment_expression : conditional_expression { ( ASSIGNEQUAL | TIMESEQUAL | DIVIDEEQUAL | MODEQUAL | PLUSEQUAL | MINUSEQUAL | SHIFTLEFTEQUAL | SHIFTRIGHTEQUAL | BITWISEANDEQUAL | BITWISEXOREQUAL | BITWISEOREQUAL ) assignment_expression } ; conditional_expression : logical_or_expression { QUESTIONMARK logical_or_expression COLON logical_or_expression } ; constant_expression : conditional_expression ; logical_or_expression : logical_and_expression (OR logical_and_expression)* ; logical_and_expression : inclusive_or_expression (AND inclusive_or_expression)* ; inclusive_or_expression : exclusive_or_expression (BITWISEOR exclusive_or_expression)* ; exclusive_or_expression : and_expression (BITWISEXOR and_expression)* ; and_expression : equality_expression (AMPERSAND equality_expression)* ; equality_expression : relational_expression ((NOTEQUAL | EQUAL) relational_expression)* ; relational_expression : shift_expression #pragma approx ( ( LESSTHAN | GREATERTHAN | LESSTHANOREQUALTO | GREATERTHANOREQUALTO ) shift_expression )* ; shift_expression : additive_expression ((SHIFTLEFT | SHIFTRIGHT) additive_expression)* ; /* See comment for multiplicative_expression regarding #pragma */ additive_expression : multiplicative_expression #pragma approx ((PLUS | MINUS) multiplicative_expression)* ; /* ANTLR has trouble dealing with the analysis of the confusing unary/binary * operators such as STAR, AMPERSAND, PLUS, etc... With the #pragma * we simply tell ANTLR to use the "quick-to-analyze" approximate lookahead * as full LL(k) lookahead will not resolve the ambiguity anyway. Might * as well not bother. This has the side-benefit that ANTLR doesn't go * off to lunch here (take infinite time to read grammar). */ multiplicative_expression : pm_expression #pragma approx ((STAR | DIVIDE | MOD) pm_expression)* ; pm_expression : cast_expression (( ".\*" | "\->\*" ) cast_expression)* ; /* The string "( ID" can be either the start of a cast or * the start of a unary_expression. However, the ID must * be a type name for it to be a cast. Since ANTLR can only hoist * semantic predicates that are visible without consuming a token, * the semantic predicate in rule type_name is not hoisted--hence, the * rule is reported to be ambiguous. I am manually putting in the * correctly hoisted predicate. * * Ack! Actually "( ID" might be the start of "(T(expr))" which makes * the first parens just an ordinary expression grouping. The solution * is to look at what follows the type, T. Note, this could be a * qualified type. Yucko. I believe that "(T(" can only imply * function-style type cast in an expression (...) grouping. * * We DO NOT handle the following situation correctly at the moment: * Suppose you have * struct rusage rusage; * return (rusage.fp); * return (rusage*)p; * Now essentially there is an ambiguity here. If rusage is followed by any * postix operators then it is an identifier else it is a type name. This * problem does not occur in C because, unless the tag struct is attached, * rusage is not a type name. However in C++ that restriction is removed. * No *real* programmer would do this, but it's in the C++ standard just for * fun.. * * Another fun one (from an LL standpoint): * * (A::B::T *)v; // that's a cast of v to type A::B::T * (A::B::foo); // that's a simple member access * * The qualifiedItemIs(1) function scans ahead to what follows the * final "::" and returns true if the item is a type. The offset of * '1' makes it ignore the initial "\("; normally, the offset is 0. */ cast_expression : ( "\(" "::" | "\(" ID )? => <<(qualifiedItemIs(1)==qiType||qualifiedItemIs(1)==qiCtor)&<(3)->getType()!=LPARENTHESIS>>? "\(" type_name "\)" cast_expression | unary_expression // handles outer (...) of "(T(expr))" ; unary_expression : postfix_expression | PLUSPLUS unary_expression | MINUSMINUS unary_expression | unary_operator cast_expression | SIZEOF ( /* see comment for rule cast_expression for info on predicate */ ( LPARENTHESIS ID )? => <getText())>>? LPARENTHESIS type_name RPARENTHESIS | unary_expression ) | new_expression | delete_expression ; /* The first ()? is used to resolve "new (expr) (type)" because both * (expr) and (type) look identical until you've seen the whole thing. * * new_initializer appears to be conflicting with function arguments as * function arguments can follow a primary_expression. [This is a full * LL(k) versus LALL(k) problem. Enhancing context by duplication of * some rules might handle this.] */ new_expression : // counteract pred for postfix expr that says :: must start a // qualified item; here, it can start a "new". ( "::" )? => <<1>>? { "::" } "new" ( ("\(" expression_list "\)")? | ) ( new_type_id | "\(" type_name "\)" /* new (T(3)); */ ) ( (new_initializer)? | ) ; new_type_id : declaration_specifiers #pragma approx { new_declarator } ; new_declarator : ptr_to_member cv_qualifier_seq #pragma approx { new_declarator } | direct_new_declarator ; /* The "[expression]" construct conflicts with the "new []" construct * (and possibly others). We used approximate lookahead for the "new []" * construct so that it would not try to compute full LL(2) lookahead. * Here, we use #pragma approx again because anytime we see a [ followed * by token that can begin an expression, we always want to loop. * Approximate lookahead handles this correctly. In fact, approximate * lookahead is the same as full lookahead when all but the last lookahead * depth are singleton sets; e.g., {"["} followed by FIRST(expression). */ direct_new_declarator : #pragma approx ( "\[" expression "\]" )+ ; new_initializer : "\(" { expression_list } "\)" ; delete_expression : { "::" } "delete" { "\[" "\]" } cast_expression ; unary_operator : AMPERSAND | STAR | PLUS | MINUS | TILDE | NOT ; postfix_expression : #pragma approx ( // Function-style type cast; ugh, what a language simple_type_specifier "\(" { expression_list } "\)" | //(ID)? => <<1>>? // any other kind of ID is cool here primary_expression #pragma approx ( "\[" expression "\]" | "\(" { expression_list } "\)" | DOT id_expression | POINTERTO id_expression | PLUSPLUS | MINUSMINUS )* ) ; id_expression : <>? scope_override ( <<1>>? ID | "operator" optor | "\~" ID ) ; primary_expression : id_expression | constant | "this" | STRING | LPARENTHESIS expression RPARENTHESIS ; expression_list : assignment_expression (COMMA assignment_expression)* ; constant : OCTALINT | DECIMALINT | HEXADECIMALINT | CHARACTER | FLOATONE | FLOATTWO ; optor > [char *op] : "new" #pragma approx ( "\[" "\]" <<$op = "new[]";>> | <<$op = "new";>> ) | "delete" #pragma approx ( "\[" "\]" <<$op = "delete[]";>> | <<$op = "delete";>> ) | "\+" <<$op = "+";>> | "\-" <<$op = "-";>> | "\*" <<$op = "*";>> | "/" <<$op = "/";>> | "%" <<$op = "%";>> | "^" <<$op = "^";>> | "&" <<$op = "&";>> | "\|" <<$op = "|";>> | "\~" <<$op = "~";>> | "!" <<$op = "!";>> | "=" <<$op = "=";>> | "<" <<$op = "<";>> | ">" <<$op = ">";>> | "\+=" <<$op = "+=";>> | "\-=" <<$op = "-=";>> | "\*=" <<$op = "*=";>> | "/=" <<$op = "/=";>> | "%=" <<$op = "%=";>> | "^=" <<$op = "^=";>> | "&=" <<$op = "&=";>> | "\|=" <<$op = "|=";>> | "\<\<" <<$op = "<<";>> | "\>\>" <<$op = ">>";>> | "\>\>=" <<$op = ">>=";>> | "\<\<=" <<$op = "<<=";>> | "==" <<$op = "==";>> | "!=" <<$op = "!=";>> | "<=" <<$op = "<=";>> | ">=" <<$op = ">=";>> | "&&" <<$op = "&&";>> | "\|\|" <<$op = "||";>> | "\+\+" <<$op = "++";>> | "\-\-" <<$op = "--";>> | "," <<$op = ",";>> | "\->\*"<<$op = "->";>> | "\->" <<$op = "->";>> | "\(" "\)" <<$op = "()";>> | "\[" "\]" <<$op = "[]";>> | declaration_specifiers // user-defined type casts (ugh) #pragma approx {"\*"|"&"} <<$op = "user-defined-op [not yet available]";>> ; } #token ID "[a-zA-Z_][a-zA-Z0-9_]*" #token OCTALINT "0[0-7]*{[uUlL]}" #token DECIMALINT "[1-9][0-9]*{[uUlL]}" #token HEXADECIMALINT "(0x|0X)[0-9a-fA-F]+{[uUlL]}" #token "'" << mode (CHARACTERS); more (); >> #token "\"" << mode (STRINGS); more (); >> #token FLOATONE "([0-9]+.[0-9]* | [0-9]*.[0-9]+) {[eE]{[\-\+]}[0-9]+} {[fFlL]}" #token FLOATTWO "[0-9]+ [eE]{[\-\+]}[0-9]+ {[fFlL]}"