3 C grammar defintion for use with JavaCC
4 Contributed by Doug South (dsouth@squirrel.com.au) 21/3/97
6 This parser assumes that the C source file has been preprocessed : all
7 #includes have been included and all macros have been expanded. I accomplish
8 this with "gcc -P -E <source file> > <output file>".
10 There is a problem with compiler specific types, such as __signed, __const,
11 __inline__, etc. These types can be added as typedef types before the parser
12 is run on a file. See main() for an example. I have also found a strange little
13 compiler specific "type" if you can call it that. It is __attribute__, but it
14 does not seem to be used as a type. I found that just deleting the __attribute__
15 and the following "offensive" code works.
17 This grammar also prints out all the types defined while parsing the file. This
18 is done via a call to printTypes() when the parser is complete. If you do not want
19 this, just comment out the printTypes() method call in the production rule
20 TranslationUnit(), which BTW is the root node for parsing a C source file.
22 I have not in anyway extensively tested this grammar, in fact it is barely tested,
23 but I imagine it is better to have a starting point for a C grammar other than from
24 scratch. It has not been optimized in anyway, my main aim was to get a parser that
25 works. Lookahead may not be optimum at choice points and may even be insufficient at
26 times. I choose to err on the side of not optimum if I made a choice at all.
28 If you use this grammar, I would appreciate hearing from you. I will try to maintain
29 this grammar to the best of my ability, but at this point in time, this is only a side
30 hobby (unless someone wants to pay me for doing JavaCC work!). In that regards, I am
31 interested in hearing bugs and comments.
35 Insert the appropriate code to enable C source trees from this grammar.
37 =============================================
38 3/2/06: Modified by Tom Copeland
39 - STRING_LITERAL now handles embedded escaped newlines, thanks to J.Chris Findlay for the patch
40 - Works with JavaCC 4.0
41 - Preprocessor directives are now simply SKIP'd, so no need to run C files through GCC first
43 31/8/10: Modified heavily by Christian Grothoff
44 - No more tracking of type names (so we can run without preprocessing)
45 - Support certain gcc-isms (unsigned long long, 33LL, etc.)
46 - No support for certain older C constructs
47 - Support for magic "GNUNET_PACKED" construct (extra "IDENTIFIER" in struct)
49 8/11/10: Modified some more by Christian Grothoff
50 - support for arguments without variable names (in particular, just 'void')
51 - support for string concatenations
61 public static void main ( String args [ ] ) {
65 System.out.println("C Parser Version 0.1Alpha: Reading from standard input . . .");
66 parser = new CParser(System.in);
68 else if(args.length == 1){
69 System.out.println("C Parser Version 0.1Alpha: Reading from file " + args[0] + " . . ." );
71 parser = new CParser(new java.io.FileInputStream(args[0]));
73 catch(java.io.FileNotFoundException e){
74 System.out.println("C Parser Version 0.1: File " + args[0] + " not found.");
79 System.out.println("C Parser Version 0.1Alpha: Usage is one of:");
80 System.out.println(" java CParser < inputfile");
81 System.out.println("OR");
82 System.out.println(" java CParser inputfile");
86 parser.TranslationUnit();
87 System.out.println("C Parser Version 0.1Alpha: Java program parsed successfully.");
89 catch(ParseException e){
90 System.out.println("C Parser Version 0.1Alpha: Encountered errors during parse.");
103 | <"//" (~["\n","\r"])* ("\n" | "\r" | "\r\n")>
104 | <"/*" (~["*"])* "*" ("*" | ~["*","/"] (~["*"])* "*")* "/">
105 | "#" : PREPROCESSOR_OUTPUT
108 <PREPROCESSOR_OUTPUT> SKIP:
113 <PREPROCESSOR_OUTPUT> MORE:
124 <INTEGER_LITERAL: <DECIMAL_LITERAL> (["l","L"])? (["l","L"])? | <HEX_LITERAL> (["l","L"])? (["l","L"])? | <OCTAL_LITERAL> (["l","L"])? (["l","L"])?>
125 | <#DECIMAL_LITERAL: ["1"-"9"] (["0"-"9"])* >
126 | <#HEX_LITERAL: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+>
127 | <#OCTAL_LITERAL: "0" (["0"-"7"])*>
128 | <FLOATING_POINT_LITERAL: (["0"-"9"])+ "." (["0"-"9"])* (<EXPONENT>)? (["f","F","d","D"])? | "." (["0"-"9"])+ (<EXPONENT>)? (["f","F","d","D"])? | (["0"-"9"])+ <EXPONENT> (["f","F","d","D"])? | (["0"-"9"])+ (<EXPONENT>)? ["f","F","d","D"]>
129 | <#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+>
133 ( (~["'","\\","\n","\r"])
135 ( ["n","t","b","r","f","\\","'","\""]
136 | ["0"-"7"] ( ["0"-"7"] )?
137 | ["0"-"3"] ["0"-"7"] ["0"-"7"]
145 ( (~["\"","\\","\n","\r"])
147 ( ["n","t","b","r","f","\\","'","\""]
148 | ["0"-"7"] ( ["0"-"7"] )?
149 | ["0"-"3"] ["0"-"7"] ["0"-"7"]
158 <CONTINUE: "continue"> |
159 <VOLATILE: "volatile"> |
160 <REGISTER: "register"> |
161 <UNSIGNED: "unsigned"> |
162 <TYPEDEF: "typedef"> |
193 <IDENTIFIER: <LETTER> (<LETTER> | <DIGIT>)*>
194 | <#LETTER: ["$","A"-"Z","_","a"-"z"]>
195 | <#DIGIT: ["0"-"9"]>
198 void TranslationUnit() : {}
200 (ExternalDeclaration())+
204 void ExternalDeclaration() : {}
206 (StorageClassSpecifier())*
208 LOOKAHEAD (FunctionDeclaration()) FunctionDeclaration() |
209 LOOKAHEAD (StructOrUnionSpecifier() ";") StructOrUnionSpecifier() ";" |
210 LOOKAHEAD (EnumSpecifier() ";") EnumSpecifier() ";" |
211 LOOKAHEAD (VariableDeclaration()) VariableDeclaration() |
212 LOOKAHEAD (TypeDeclaration()) TypeDeclaration ()
216 void FunctionDeclaration() : {}
220 "(" [ ParameterList () ] ")"
221 ( ";" | CompoundStatement() )
224 void StorageClassSpecifier() : {}
226 ( <STATIC> | <EXTERN> )
229 void TypeDeclaration() : {}
232 ( LOOKAHEAD (DataType() ";") DataType () | FunctionType() ) ";"
237 StructOrUnionSpecifier () <IDENTIFIER>
240 void FunctionType() : {}
242 TypeSpecifier () "(" "*" <IDENTIFIER> ")" "(" [ ParameterList() ] ")"
245 void ParameterList() : {}
247 ParameterDeclaration() ( LOOKAHEAD (2) "," ParameterDeclaration() )* [ "," "..." ]
250 void ParameterDeclaration() : {}
252 TypeSpecifier() [<IDENTIFIER> [ Array () ]]
255 void VariableDeclaration() : {}
257 VariableClassSpecifier ()
259 InitDeclaratorList() ";"
262 void LocalVariableDeclaration() : {}
264 [ <STATIC> ] VariableDeclaration ()
267 void VariableClassSpecifier() : {}
269 ( <AUTO> | <REGISTER> )*
272 void TypeSpecifier() : {}
281 | (<SIGNED> | <UNSIGNED>) [ <CHAR>
284 | <LONG> [ <LONG> ] ]
285 | StructOrUnionSpecifier()
293 /* this is needed for 'va_arg' where a type is an argument
294 -- and we cannot disambiguate the use of 'FOO'
295 after a 'typedef int FOO' from the variable 'FOO';
297 void NoIdentifierTypeSpecifier() : {}
306 | (<SIGNED> | <UNSIGNED>) [ <CHAR>
309 | <LONG> [ <LONG> ] ]
310 | StructOrUnionSpecifier()
317 void StructOrUnionSpecifier() : {}
320 StructOrUnion() [ <IDENTIFIER> ] "{" StructDeclarationList() "}" |
321 StructOrUnion() <IDENTIFIER>
324 void StructOrUnion() : {}
326 ( <STRUCT> | <UNION> )
329 void StructDeclarationList() : {}
331 (StructDeclaration())*
334 void InitDeclaratorList() : {}
336 InitDeclarator() ("," InitDeclarator())*
339 void InitDeclarator() : {}
341 <IDENTIFIER> [ Array () ] [ "=" Initializer() ]
344 void StructDeclaration() : {}
346 TypeSpecifier() <IDENTIFIER> [ Array() | ":" ConstantExpression() ] [ <IDENTIFIER> ] ";"
349 void EnumSpecifier() : {}
351 <ENUM> ( LOOKAHEAD(3) [ <IDENTIFIER> ] "{" EnumeratorList() "}" | <IDENTIFIER> )
354 void EnumeratorList() : {}
356 Enumerator() ("," Enumerator())*
359 void Enumerator() : {}
361 <IDENTIFIER> [ "=" ConstantExpression() ]
366 "*" [ <CONST> ] [ Pointer() ]
369 void Initializer() : {}
371 ( AssignmentExpression() |
372 "{" InitializerList() [","] "}" )
375 void InitializerList() : {}
377 Initializer() (LOOKAHEAD(2) "," Initializer())*
383 ("[" [ConstantExpression()] "]" )+
386 void Statement() : {}
388 ( LOOKAHEAD(2) LabeledStatement() |
389 ExpressionStatement() |
390 CompoundStatement() |
391 SelectionStatement() |
392 IterationStatement() |
396 void LabeledStatement() : {}
398 ( <IDENTIFIER> ":" Statement() |
399 <CASE> ConstantExpression() ":" Statement() |
400 <DFLT> ":" Statement() )
403 void ExpressionStatement() : {}
408 void CompoundStatement() : {}
410 "{" ( LOOKAHEAD (LocalVariableDeclaration()) LocalVariableDeclaration () |
415 void SelectionStatement() : {}
417 ( IfStatement() | SwitchStatement() )
420 void IfStatement() : {}
422 <IF> "(" Expression() ")" Statement() [ LOOKAHEAD(2) <ELSE> Statement() ]
425 void SwitchStatement() : {}
427 <SWITCH> "(" Expression() ")" Statement()
430 void IterationStatement() : {}
432 ( WhileStatement() | DoWhileStatement() | ForStatement() )
434 void WhileStatement() : {}
436 <WHILE> "(" Expression() ")" Statement()
438 void DoWhileStatement() : {}
440 <DO> Statement() <WHILE> "(" Expression() ")" ";"
442 void ForStatement() : {}
444 <FOR> "(" [ Expression() ] ";" [ Expression() ] ";" [ Expression() ] ")" Statement()
447 void JumpStatement() : {}
449 ( <GOTO> <IDENTIFIER> ";" |
452 <RETURN> [ Expression() ] ";" )
455 void Expression() : {}
457 AssignmentExpression() ( "," AssignmentExpression() )*
460 void AssignmentExpression() : {}
462 LOOKAHEAD(UnaryExpression() AssignmentOperator()) UnaryExpression() AssignmentOperator() AssignmentExpression() |
463 LOOKAHEAD(3) ConditionalExpression()
466 void AssignmentOperator() : {}
468 ( "=" | "*=" | "/=" | "%=" | "+=" | "-=" | "<<=" | ">>=" | "&=" | "^=" | "|=" )
471 void ConditionalExpression() : {}
473 LogicalORExpression() [ "?" Expression() ":" ConditionalExpression() ]
476 void ConstantExpression() : {}
478 ConditionalExpression()
481 void LogicalORExpression() : {}
483 LogicalANDExpression() [ "||" LogicalORExpression() ]
486 void LogicalANDExpression() : {}
488 InclusiveORExpression() [ "&&" LogicalANDExpression() ]
491 void InclusiveORExpression() : {}
493 ExclusiveORExpression() [ "|" InclusiveORExpression() ]
496 void ExclusiveORExpression() : {}
498 ANDExpression() [ "^" ExclusiveORExpression() ]
501 void ANDExpression() : {}
503 EqualityExpression() [ "&" ANDExpression() ]
506 void EqualityExpression() : {}
508 RelationalExpression() [ ( "==" | "!=" ) EqualityExpression() ]
511 void RelationalExpression() : {}
513 ShiftExpression() [ ( "<" | ">" | "<=" | ">=" ) RelationalExpression() ]
516 void ShiftExpression() : {}
518 AdditiveExpression() [ ( "<<" | ">>" ) ShiftExpression() ]
521 void AdditiveExpression() : {}
523 MultiplicativeExpression() [ ( "+" | "-" ) AdditiveExpression() ]
526 void MultiplicativeExpression() : {}
528 CastExpression() [ ( "*" | "/" | "%" ) MultiplicativeExpression() ]
531 void CastExpression() : {}
533 ( LOOKAHEAD("(" TypeSpecifier() ")" CastExpression() ) "(" TypeSpecifier() ")" CastExpression() |
537 void UnaryExpression() : {}
539 ( LOOKAHEAD(3) PostfixExpression() |
540 "++" UnaryExpression() |
541 "--" UnaryExpression() |
542 UnaryOperator() CastExpression() |
543 <SIZEOF> ( LOOKAHEAD(UnaryExpression() ) UnaryExpression() | "(" TypeSpecifier() ")" ) )
546 void UnaryOperator() : {}
548 ( "&" | "*" | "+" | "-" | "~" | "!" )
551 void PostfixExpression() : {}
553 PrimaryExpression() ( "[" Expression() "]" |
554 "(" [ LOOKAHEAD(ArgumentExpressionList() ) ArgumentExpressionList() ] ")" |
561 void PrimaryExpression() : {}
568 void ArgumentExpressionList() : {}
570 AssignmentOrTypeExpression() ( "," AssignmentOrTypeExpression() )*
574 void AssignmentOrTypeExpression() : {}
576 NoIdentifierTypeSpecifier() |
577 AssignmentExpression()
583 <FLOATING_POINT_LITERAL> |
584 <CHARACTER_LITERAL> |