3 C grammar defintion for use with JavaCC
4 Contributed by Doug South (dsouth@squirrel.com.au) 21/3/97
6 This parser assumes that the C source file has been preprocessed : all
7 #includes have been included and all macros have been expanded. I accomplish
8 this with "gcc -P -E <source file> > <output file>".
10 There is a problem with compiler specific types, such as __signed, __const,
11 __inline__, etc. These types can be added as typedef types before the parser
12 is run on a file. See main() for an example. I have also found a strange little
13 compiler specific "type" if you can call it that. It is __attribute__, but it
14 does not seem to be used as a type. I found that just deleting the __attribute__
15 and the following "offensive" code works.
17 This grammar also prints out all the types defined while parsing the file. This
18 is done via a call to printTypes() when the parser is complete. If you do not want
19 this, just comment out the printTypes() method call in the production rule
20 TranslationUnit(), which BTW is the root node for parsing a C source file.
22 I have not in anyway extensively tested this grammar, in fact it is barely tested,
23 but I imagine it is better to have a starting point for a C grammar other than from
24 scratch. It has not been optimized in anyway, my main aim was to get a parser that
25 works. Lookahead may not be optimum at choice points and may even be insufficient at
26 times. I choose to err on the side of not optimum if I made a choice at all.
28 If you use this grammar, I would appreciate hearing from you. I will try to maintain
29 this grammar to the best of my ability, but at this point in time, this is only a side
30 hobby (unless someone wants to pay me for doing JavaCC work!). In that regards, I am
31 interested in hearing bugs and comments.
35 Insert the appropriate code to enable C source trees from this grammar.
37 =============================================
38 3/2/06: Modified by Tom Copeland
39 - STRING_LITERAL now handles embedded escaped newlines, thanks to J.Chris Findlay for the patch
40 - Works with JavaCC 4.0
41 - Preprocessor directives are now simply SKIP'd, so no need to run C files through GCC first
43 31/8/10: Modified heavily by Christian Grothoff
44 - No more tracking of type names (so we can run without preprocessing)
45 - Support certain gcc-isms (unsigned long long, 33LL, etc.)
46 - No support for certain older C constructs
47 - Support for magic "GNUNET_PACKED" construct (extra "IDENTIFIER" in struct)
57 public static void main ( String args [ ] ) {
61 System.out.println("C Parser Version 0.1Alpha: Reading from standard input . . .");
62 parser = new CParser(System.in);
64 else if(args.length == 1){
65 System.out.println("C Parser Version 0.1Alpha: Reading from file " + args[0] + " . . ." );
67 parser = new CParser(new java.io.FileInputStream(args[0]));
69 catch(java.io.FileNotFoundException e){
70 System.out.println("C Parser Version 0.1: File " + args[0] + " not found.");
75 System.out.println("C Parser Version 0.1Alpha: Usage is one of:");
76 System.out.println(" java CParser < inputfile");
77 System.out.println("OR");
78 System.out.println(" java CParser inputfile");
82 parser.TranslationUnit();
83 System.out.println("C Parser Version 0.1Alpha: Java program parsed successfully.");
85 catch(ParseException e){
86 System.out.println("C Parser Version 0.1Alpha: Encountered errors during parse.");
99 | <"//" (~["\n","\r"])* ("\n" | "\r" | "\r\n")>
100 | <"/*" (~["*"])* "*" ("*" | ~["*","/"] (~["*"])* "*")* "/">
101 | "#" : PREPROCESSOR_OUTPUT
104 <PREPROCESSOR_OUTPUT> SKIP:
109 <PREPROCESSOR_OUTPUT> MORE:
120 <INTEGER_LITERAL: <DECIMAL_LITERAL> (["l","L"])? (["l","L"])? | <HEX_LITERAL> (["l","L"])? (["l","L"])? | <OCTAL_LITERAL> (["l","L"])? (["l","L"])?>
121 | <#DECIMAL_LITERAL: ["1"-"9"] (["0"-"9"])* >
122 | <#HEX_LITERAL: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+>
123 | <#OCTAL_LITERAL: "0" (["0"-"7"])*>
124 | <FLOATING_POINT_LITERAL: (["0"-"9"])+ "." (["0"-"9"])* (<EXPONENT>)? (["f","F","d","D"])? | "." (["0"-"9"])+ (<EXPONENT>)? (["f","F","d","D"])? | (["0"-"9"])+ <EXPONENT> (["f","F","d","D"])? | (["0"-"9"])+ (<EXPONENT>)? ["f","F","d","D"]>
125 | <#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+>
126 | <CHARACTER_LITERAL: "\'" (~["\'","\\","\n","\r"] | "\\" (["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"])) "\'">
127 | <STRING_LITERAL: "\"" ( ~["\"","\\","\n","\r"] | "\\" ( ["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"] | ( ["\n","\r"] | "\r\n")))* "\"" ( ( ["\r","\n"," "] )* "\"" ( ~["\"","\\","\n","\r"] | "\\" ( ["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"] | ( ["\n","\r"] | "\r\n")))* "\"" )* >
131 <CONTINUE: "continue"> |
132 <VOLATILE: "volatile"> |
133 <REGISTER: "register"> |
134 <UNSIGNED: "unsigned"> |
135 <TYPEDEF: "typedef"> |
166 <IDENTIFIER: <LETTER> (<LETTER> | <DIGIT>)*>
167 | <#LETTER: ["$","A"-"Z","_","a"-"z"]>
168 | <#DIGIT: ["0"-"9"]>
171 void TranslationUnit() : {}
173 (ExternalDeclaration())+
176 void ExternalDeclaration() : {}
178 (StorageClassSpecifier())*
180 LOOKAHEAD (FunctionDeclaration()) FunctionDeclaration() |
181 LOOKAHEAD (StructOrUnionSpecifier()) StructOrUnionSpecifier() |
182 LOOKAHEAD (VariableDeclaration()) VariableDeclaration() |
183 LOOKAHEAD (TypeDeclaration()) TypeDeclaration ()
187 void FunctionDeclaration() : {}
191 "(" [ ParameterList () ] ")"
192 ( ";" | CompoundStatement() )
195 void StorageClassSpecifier() : {}
197 ( <STATIC> | <EXTERN> )
200 void TypeDeclaration() : {}
203 ( LOOKAHEAD (DataType() ";") DataType () | FunctionType() ) ";"
208 StructOrUnionSpecifier () <IDENTIFIER>
211 void FunctionType() : {}
213 TypeSpecifier () "(" "*" <IDENTIFIER> ")" "(" [ ParameterList() ] ")"
216 void ParameterList() : {}
218 ParameterDeclaration() ( LOOKAHEAD (2) "," ParameterDeclaration() )* [ "," "..." ]
221 void ParameterDeclaration() : {}
223 TypeSpecifier() <IDENTIFIER> [ Array () ]
226 void VariableDeclaration() : {}
228 VariableClassSpecifier ()
230 InitDeclaratorList() ";"
233 void LocalVariableDeclaration() : {}
235 [ <STATIC> ] VariableDeclaration ()
238 void VariableClassSpecifier() : {}
240 ( <AUTO> | <REGISTER> )*
243 void TypeSpecifier() : {}
252 | (<SIGNED> | <UNSIGNED>) [ <CHAR>
255 | <LONG> [ <LONG> ] ]
256 | StructOrUnionSpecifier()
264 /* this is needed for 'va_arg' where a type is an argument
265 -- and we cannot disambiguate the use of 'FOO'
266 after a 'typedef int FOO' from the variable 'FOO';
268 void NoIdentifierTypeSpecifier() : {}
277 | (<SIGNED> | <UNSIGNED>) [ <CHAR>
280 | <LONG> [ <LONG> ] ]
281 | StructOrUnionSpecifier()
288 void StructOrUnionSpecifier() : {}
291 StructOrUnion() [ <IDENTIFIER> ] "{" StructDeclarationList() "}" |
292 StructOrUnion() <IDENTIFIER>
295 void StructOrUnion() : {}
297 ( <STRUCT> | <UNION> )
300 void StructDeclarationList() : {}
302 (StructDeclaration())+
305 void InitDeclaratorList() : {}
307 InitDeclarator() ("," InitDeclarator())*
310 void InitDeclarator() : {}
312 <IDENTIFIER> [ Array () ] [ "=" Initializer() ]
315 void StructDeclaration() : {}
317 TypeSpecifier() <IDENTIFIER> [ Array() | ":" ConstantExpression() ] [ <IDENTIFIER> ] ";"
320 void EnumSpecifier() : {}
322 <ENUM> ( LOOKAHEAD(3) [ <IDENTIFIER> ] "{" EnumeratorList() "}" | <IDENTIFIER> )
325 void EnumeratorList() : {}
327 Enumerator() ("," Enumerator())*
330 void Enumerator() : {}
332 <IDENTIFIER> [ "=" ConstantExpression() ]
337 "*" [ <CONST> ] [ Pointer() ]
340 void IdentifierList() : {}
342 <IDENTIFIER> ("," <IDENTIFIER>)*
345 void Initializer() : {}
347 ( AssignmentExpression() |
348 "{" InitializerList() [","] "}" )
351 void InitializerList() : {}
353 Initializer() (LOOKAHEAD(2) "," Initializer())*
359 "[" [ConstantExpression()] "]"
362 void Statement() : {}
364 ( LOOKAHEAD(2) LabeledStatement() |
365 ExpressionStatement() |
366 CompoundStatement() |
367 SelectionStatement() |
368 IterationStatement() |
372 void LabeledStatement() : {}
374 ( <IDENTIFIER> ":" Statement() |
375 <CASE> ConstantExpression() ":" Statement() |
376 <DFLT> ":" Statement() )
379 void ExpressionStatement() : {}
384 void CompoundStatement() : {}
386 "{" ( LOOKAHEAD (LocalVariableDeclaration()) LocalVariableDeclaration () |
391 void SelectionStatement() : {}
393 ( <IF> "(" Expression() ")" Statement() [ LOOKAHEAD(2) <ELSE> Statement() ] |
394 <SWITCH> "(" Expression() ")" Statement() )
397 void IterationStatement() : {}
399 ( <WHILE> "(" Expression() ")" Statement() |
400 <DO> Statement() <WHILE> "(" Expression() ")" ";" |
401 <FOR> "(" [ Expression() ] ";" [ Expression() ] ";" [ Expression() ] ")" Statement() )
404 void JumpStatement() : {}
406 ( <GOTO> <IDENTIFIER> ";" |
409 <RETURN> [ Expression() ] ";" )
412 void Expression() : {}
414 AssignmentExpression() ( "," AssignmentExpression() )*
417 void AssignmentExpression() : {}
419 LOOKAHEAD(UnaryExpression() AssignmentOperator()) UnaryExpression() AssignmentOperator() AssignmentExpression() |
420 LOOKAHEAD(3) ConditionalExpression()
423 void AssignmentOperator() : {}
425 ( "=" | "*=" | "/=" | "%=" | "+=" | "-=" | "<<=" | ">>=" | "&=" | "^=" | "|=" )
428 void ConditionalExpression() : {}
430 LogicalORExpression() [ "?" Expression() ":" ConditionalExpression() ]
433 void ConstantExpression() : {}
435 ConditionalExpression()
438 void LogicalORExpression() : {}
440 LogicalANDExpression() [ "||" LogicalORExpression() ]
443 void LogicalANDExpression() : {}
445 InclusiveORExpression() [ "&&" LogicalANDExpression() ]
448 void InclusiveORExpression() : {}
450 ExclusiveORExpression() [ "|" InclusiveORExpression() ]
453 void ExclusiveORExpression() : {}
455 ANDExpression() [ "^" ExclusiveORExpression() ]
458 void ANDExpression() : {}
460 EqualityExpression() [ "&" ANDExpression() ]
463 void EqualityExpression() : {}
465 RelationalExpression() [ ( "==" | "!=" ) EqualityExpression() ]
468 void RelationalExpression() : {}
470 ShiftExpression() [ ( "<" | ">" | "<=" | ">=" ) RelationalExpression() ]
473 void ShiftExpression() : {}
475 AdditiveExpression() [ ( "<<" | ">>" ) ShiftExpression() ]
478 void AdditiveExpression() : {}
480 MultiplicativeExpression() [ ( "+" | "-" ) AdditiveExpression() ]
483 void MultiplicativeExpression() : {}
485 CastExpression() [ ( "*" | "/" | "%" ) MultiplicativeExpression() ]
488 void CastExpression() : {}
490 ( LOOKAHEAD("(" TypeSpecifier() ")" CastExpression() ) "(" TypeSpecifier() ")" CastExpression() |
494 void UnaryExpression() : {}
496 ( LOOKAHEAD(3) PostfixExpression() |
497 "++" UnaryExpression() |
498 "--" UnaryExpression() |
499 UnaryOperator() CastExpression() |
500 <SIZEOF> ( LOOKAHEAD(UnaryExpression() ) UnaryExpression() | "(" TypeSpecifier() ")" ) )
503 void UnaryOperator() : {}
505 ( "&" | "*" | "+" | "-" | "~" | "!" )
508 void PostfixExpression() : {}
510 PrimaryExpression() ( "[" Expression() "]" |
511 "(" [ LOOKAHEAD(ArgumentExpressionList() ) ArgumentExpressionList() ] ")" |
518 void PrimaryExpression() : {}
525 void ArgumentExpressionList() : {}
527 AssignmentOrTypeExpression() ( "," AssignmentOrTypeExpression() )*
531 void AssignmentOrTypeExpression() : {}
533 NoIdentifierTypeSpecifier() |
534 AssignmentExpression()
539 <INTEGER_LITERAL> | <FLOATING_POINT_LITERAL> | <CHARACTER_LITERAL> | <STRING_LITERAL>