2 # Copyright 2018 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
9 package OpenSSL::ParseC;
15 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
21 my @preprocessor_conds; # A list of simple preprocessor conditions,
22 # each item being a list of macros defined
27 return map { ( @$_ ) } @preprocessor_conds;
30 # A list of handlers that will look at a "complete" string and try to
31 # figure out what to make of it.
32 # Each handler is a hash with the following keys:
34 # regexp a regexp to compare the "complete" string with.
35 # checker a function that does a more complex comparison.
36 # Use this instead of regexp if that isn't enough.
37 # massager massages the "complete" string into an array with
38 # the following elements:
40 # [0] String that needs further processing (this
41 # applies to typedefs of structs), or empty.
42 # [1] The name of what was found.
43 # [2] A character that denotes what type of thing
44 # this is: 'F' for function, 'S' for struct,
45 # 'T' for typedef, 'M' for macro, 'V' for
47 # [3] Return type (only for type 'F' and 'V')
48 # [4] Value (for type 'M') or signature (for type 'F',
50 # [5...] The list of preprocessor conditions this is
51 # found in, as in checks for macro definitions
52 # (stored as the macro's name) or the absence
53 # of definition (stored as the macro's name
56 # If the massager returns an empty list, it means the
57 # "complete" string has side effects but should otherwise
59 # If the massager is undefined, the "complete" string
61 my @opensslcpphandlers = (
62 ##################################################################
63 # OpenSSL CPP specials
65 # These are used to convert certain pre-precessor expressions into
66 # others that @cpphandlers have a better chance to understand.
68 { regexp => qr/#if (!?)OPENSSL_API_([0-9_]+)$/,
70 my $cnd = $1 eq '!' ? 'ndef' : 'def';
72 #if$cnd DEPRECATEDIN_$2
78 ##################################################################
81 { regexp => qr/#ifdef ?(.*)/,
84 if (ref($_[$#_]) eq "HASH") {
88 push @preprocessor_conds, [ $1 ];
89 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
94 { regexp => qr/#ifndef ?(.*)/,
97 if (ref($_[$#_]) eq "HASH") {
101 push @preprocessor_conds, [ '!'.$1 ];
102 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
107 { regexp => qr/#if (0|1)/,
110 if (ref($_[$#_]) eq "HASH") {
115 push @preprocessor_conds, [ "TRUE" ];
117 push @preprocessor_conds, [ "!TRUE" ];
119 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
124 { regexp => qr/#if ?(.*)/,
127 if (ref($_[$#_]) eq "HASH") {
133 if ($conds =~ m|^defined<<<\(([^\)]*)\)>>>(.*)$|) {
134 push @results, $1; # Handle the simple case
136 my $re = qr/^(?:\|\|defined<<<\([^\)]*\)>>>)*$/;
137 print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n"
139 if ($rest =~ m/$re/) {
140 my @rest = split /\|\|/, $rest;
143 m|^defined<<<\(([^\)]*)\)>>>$|;
144 die "Something wrong...$opts{PLACE}" if $1 eq "";
148 $conds =~ s/<<<|>>>//g;
149 warn "Warning: complicated #if expression(1): $conds$opts{PLACE}"
152 } elsif ($conds =~ m|^!defined<<<\(([^\)]*)\)>>>(.*)$|) {
153 push @results, '!'.$1; # Handle the simple case
155 my $re = qr/^(?:\&\&!defined<<<\([^\)]*\)>>>)*$/;
156 print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n"
158 if ($rest =~ m/$re/) {
159 my @rest = split /\&\&/, $rest;
162 m|^!defined<<<\(([^\)]*)\)>>>$|;
163 die "Something wrong...$opts{PLACE}" if $1 eq "";
164 push @results, '!'.$1;
167 $conds =~ s/<<<|>>>//g;
168 warn "Warning: complicated #if expression(2): $conds$opts{PLACE}"
172 $conds =~ s/<<<|>>>//g;
173 warn "Warning: complicated #if expression(3): $conds$opts{PLACE}"
176 print STDERR "DEBUG[",$opts{debug_type},"]: Added preprocessor conds: '", join("', '", @results), "'\n"
178 push @preprocessor_conds, [ @results ];
179 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
184 { regexp => qr/#elif (.*)/,
187 if (ref($_[$#_]) eq "HASH") {
191 die "An #elif without corresponding condition$opts{PLACE}"
192 if !@preprocessor_conds;
193 pop @preprocessor_conds;
194 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
201 { regexp => qr/#else/,
204 if (ref($_[$#_]) eq "HASH") {
208 die "An #else without corresponding condition$opts{PLACE}"
209 if !@preprocessor_conds;
210 # Invert all conditions on the last level
211 my $stuff = pop @preprocessor_conds;
212 push @preprocessor_conds, [
213 map { m|^!(.*)$| ? $1 : '!'.$_ } @$stuff
215 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
220 { regexp => qr/#endif ?/,
223 if (ref($_[$#_]) eq "HASH") {
227 die "An #endif without corresponding condition$opts{PLACE}"
228 if !@preprocessor_conds;
229 pop @preprocessor_conds;
230 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
235 { regexp => qr/#define ([[:alpha:]_]\w*)(<<<\(.*?\)>>>)?( (.*))?/,
239 my $spaceval = $3||"";
242 $1, 'M', "", $params ? "$name$params$spaceval" : $val,
246 massager => sub { return (); }
250 my @opensslchandlers = (
251 ##################################################################
254 # They are really preprocessor stuff, but they look like C stuff
255 # to this parser. All of these do replacements, anything else is
259 # Global variable stuff
260 { regexp => qr/OPENSSL_DECLARE_GLOBAL<<<\((.*),\s*(.*)\)>>>;/,
261 massager => sub { return (<<"EOF");
262 #ifndef OPENSSL_EXPORT_VAR_AS_FUNCTION
263 OPENSSL_EXPORT $1 _shadow_$2;
265 $1 *_shadow_$2(void);
272 # Deprecated stuff, by OpenSSL release.
274 # We trick the parser by pretending that the declaration is wrapped in a
275 # check if the DEPRECATEDIN macro is defined or not. Callers of parse()
276 # will have to decide what to do with it.
277 { regexp => qr/(DEPRECATEDIN_\d+(?:_\d+_\d+)?)<<<\((.*)\)>>>/,
278 massager => sub { return (<<"EOF");
289 # LHASH_OF(foo) is used as a type, but the chandlers won't take it
290 # gracefully, so we expand it here.
291 { regexp => qr/(.*)\bLHASH_OF<<<\((.*?)\)>>>(.*)/,
292 massager => sub { return ("$1struct lhash_st_$2$3"); }
294 { regexp => qr/DEFINE_LHASH_OF<<<\((.*)\)>>>/,
297 static ossl_inline LHASH_OF($1) * lh_$1_new(unsigned long (*hfn)(const $1 *),
298 int (*cfn)(const $1 *, const $1 *));
299 static ossl_inline void lh_$1_free(LHASH_OF($1) *lh);
300 static ossl_inline $1 *lh_$1_insert(LHASH_OF($1) *lh, $1 *d);
301 static ossl_inline $1 *lh_$1_delete(LHASH_OF($1) *lh, const $1 *d);
302 static ossl_inline $1 *lh_$1_retrieve(LHASH_OF($1) *lh, const $1 *d);
303 static ossl_inline int lh_$1_error(LHASH_OF($1) *lh);
304 static ossl_inline unsigned long lh_$1_num_items(LHASH_OF($1) *lh);
305 static ossl_inline void lh_$1_node_stats_bio(const LHASH_OF($1) *lh, BIO *out);
306 static ossl_inline void lh_$1_node_usage_stats_bio(const LHASH_OF($1) *lh,
308 static ossl_inline void lh_$1_stats_bio(const LHASH_OF($1) *lh, BIO *out);
309 static ossl_inline unsigned long lh_$1_get_down_load(LHASH_OF($1) *lh);
310 static ossl_inline void lh_$1_set_down_load(LHASH_OF($1) *lh, unsigned long dl);
311 static ossl_inline void lh_$1_doall(LHASH_OF($1) *lh, void (*doall)($1 *));
320 # STACK_OF(foo) is used as a type, but the chandlers won't take it
321 # gracefully, so we expand it here.
322 { regexp => qr/(.*)\bSTACK_OF<<<\((.*?)\)>>>(.*)/,
323 massager => sub { return ("$1struct stack_st_$2$3"); }
325 # { regexp => qr/(.*)\bSTACK_OF\((.*?)\)(.*)/,
328 # my $stack_of = "struct stack_st_$2";
330 # if ($after =~ m|^\w|) { $after = " ".$after; }
331 # return ("$before$stack_of$after");
334 { regexp => qr/SKM_DEFINE_STACK_OF<<<\((.*),\s*(.*),\s*(.*)\)>>>/,
338 typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b);
339 typedef void (*sk_$1_freefunc)($3 *a);
340 typedef $3 * (*sk_$1_copyfunc)(const $3 *a);
341 static ossl_inline int sk_$1_num(const STACK_OF($1) *sk);
342 static ossl_inline $2 *sk_$1_value(const STACK_OF($1) *sk, int idx);
343 static ossl_inline STACK_OF($1) *sk_$1_new(sk_$1_compfunc compare);
344 static ossl_inline STACK_OF($1) *sk_$1_new_null(void);
345 static ossl_inline STACK_OF($1) *sk_$1_new_reserve(sk_$1_compfunc compare,
347 static ossl_inline int sk_$1_reserve(STACK_OF($1) *sk, int n);
348 static ossl_inline void sk_$1_free(STACK_OF($1) *sk);
349 static ossl_inline void sk_$1_zero(STACK_OF($1) *sk);
350 static ossl_inline $2 *sk_$1_delete(STACK_OF($1) *sk, int i);
351 static ossl_inline $2 *sk_$1_delete_ptr(STACK_OF($1) *sk, $2 *ptr);
352 static ossl_inline int sk_$1_push(STACK_OF($1) *sk, $2 *ptr);
353 static ossl_inline int sk_$1_unshift(STACK_OF($1) *sk, $2 *ptr);
354 static ossl_inline $2 *sk_$1_pop(STACK_OF($1) *sk);
355 static ossl_inline $2 *sk_$1_shift(STACK_OF($1) *sk);
356 static ossl_inline void sk_$1_pop_free(STACK_OF($1) *sk,
357 sk_$1_freefunc freefunc);
358 static ossl_inline int sk_$1_insert(STACK_OF($1) *sk, $2 *ptr, int idx);
359 static ossl_inline $2 *sk_$1_set(STACK_OF($1) *sk, int idx, $2 *ptr);
360 static ossl_inline int sk_$1_find(STACK_OF($1) *sk, $2 *ptr);
361 static ossl_inline int sk_$1_find_ex(STACK_OF($1) *sk, $2 *ptr);
362 static ossl_inline void sk_$1_sort(STACK_OF($1) *sk);
363 static ossl_inline int sk_$1_is_sorted(const STACK_OF($1) *sk);
364 static ossl_inline STACK_OF($1) * sk_$1_dup(const STACK_OF($1) *sk);
365 static ossl_inline STACK_OF($1) *sk_$1_deep_copy(const STACK_OF($1) *sk,
366 sk_$1_copyfunc copyfunc,
367 sk_$1_freefunc freefunc);
368 static ossl_inline sk_$1_compfunc sk_$1_set_cmp_func(STACK_OF($1) *sk,
369 sk_$1_compfunc compare);
373 { regexp => qr/DEFINE_SPECIAL_STACK_OF<<<\((.*),\s*(.*)\)>>>/,
374 massager => sub { return ("SKM_DEFINE_STACK_OF($1,$2,$2)"); },
376 { regexp => qr/DEFINE_STACK_OF<<<\((.*)\)>>>/,
377 massager => sub { return ("SKM_DEFINE_STACK_OF($1,$1,$1)"); },
379 { regexp => qr/DEFINE_SPECIAL_STACK_OF_CONST<<<\((.*),\s*(.*)\)>>>/,
380 massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $2,$2)"); },
382 { regexp => qr/DEFINE_STACK_OF_CONST<<<\((.*)\)>>>/,
383 massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $1,$1)"); },
385 { regexp => qr/PREDECLARE_STACK_OF<<<\((.*)\)>>>/,
386 massager => sub { return ("STACK_OF($1);"); }
388 { regexp => qr/DECLARE_STACK_OF<<<\((.*)\)>>>/,
389 massager => sub { return ("STACK_OF($1);"); }
391 { regexp => qr/DECLARE_SPECIAL_STACK_OF<<<\((.*?),\s*(.*?)\)>>>/,
392 massager => sub { return ("STACK_OF($1);"); }
398 { regexp => qr/TYPEDEF_D2I_OF<<<\((.*)\)>>>/,
400 return ("typedef $1 *d2i_of_$1($1 **,const unsigned char **,long)");
403 { regexp => qr/TYPEDEF_I2D_OF<<<\((.*)\)>>>/,
405 return ("typedef $1 *i2d_of_$1($1 *,unsigned char **)");
408 { regexp => qr/TYPEDEF_D2I2D_OF<<<\((.*)\)>>>/,
410 return ("TYPEDEF_D2I_OF($1); TYPEDEF_I2D_OF($1)");
413 { regexp => qr/DECLARE_ASN1_ITEM<<<\((.*)\)>>>/,
416 #ifndef OPENSSL_EXPORT_VAR_AS_FUNCTION
417 OPENSSL_EXTERN const ASN1_ITEM *$1_it;
419 const ASN1_ITEM *$1_it(void);
424 { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_only<<<\((.*),\s*(.*)\)>>>/,
432 { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS<<<\((.*),\s*(.*),\s*(.*)\)>>>/,
437 DECLARE_ASN1_ITEM($2)
441 { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
446 DECLARE_ASN1_ITEM($2)
450 { regexp => qr/DECLARE_ASN1_ALLOC_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
458 { regexp => qr/DECLARE_ASN1_ALLOC_FUNCTIONS<<<\((.*)\)>>>/,
466 { regexp => qr/DECLARE_ASN1_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
473 DECLARE_ASN1_ITEM($2)
477 { regexp => qr/DECLARE_ASN1_FUNCTIONS<<<\((.*)\)>>>/,
478 massager => sub { return (<<"EOF");
483 DECLARE_ASN1_ITEM($1)
487 { regexp => qr/DECLARE_ASN1_NDEF_FUNCTION<<<\((.*)\)>>>/,
490 int i2d_$1_NDEF(void);
494 { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION<<<\((.*)\)>>>/,
497 int $1_print_ctx(void);
501 { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/,
504 int $2_print_ctx(void);
508 { regexp => qr/DECLARE_ASN1_SET_OF<<<\((.*)\)>>>/,
509 massager => sub { return (); }
511 { regexp => qr/DECLARE_ASN1_DUP_FUNCTION<<<\((.*)\)>>>/,
518 { regexp => qr/DECLARE_ASN1_DUP_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/,
525 { regexp => qr/DECLARE_PKCS12_SET_OF<<<\((.*)\)>>>/,
526 massager => sub { return (); }
528 { regexp => qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)<<<\((.*?),.*\)>>>/,
529 massager => sub { return (<<"EOF");
530 #ifndef OPENSSL_NO_STDIO
531 int PEM_read_$1(void);
532 int PEM_write_$1(void);
534 int PEM_read_bio_$1(void);
535 int PEM_write_bio_$1(void);
542 { regexp => qr/DECLARE_PEM(?|_write|_write_cb|_write_const)<<<\((.*?),.*\)>>>/,
543 massager => sub { return (<<"EOF");
544 #ifndef OPENSSL_NO_STDIO
545 int PEM_write_$1(void);
547 int PEM_write_bio_$1(void);
551 { regexp => qr/DECLARE_PEM(?|_read|_read_cb)<<<\((.*?),.*\)>>>/,
552 massager => sub { return (<<"EOF");
553 #ifndef OPENSSL_NO_STDIO
554 int PEM_read_$1(void);
556 int PEM_read_bio_$1(void);
561 # Spurious stuff found in the OpenSSL headers
562 # Usually, these are just macros that expand to, well, something
563 { regexp => qr/__NDK_FPABI__/,
564 massager => sub { return (); }
571 ##################################################################
574 # extern "C" of individual items
575 # Note that the main parse function has a special hack for 'extern "C" {'
576 # which can't be done in handlers
577 # We simply ignore it.
578 { regexp => qr/extern "C" (.*;)/,
579 massager => sub { return ($1); },
581 # any other extern is just ignored
582 { regexp => qr/^\s* # Any spaces before
583 extern # The keyword we look for
584 \b # word to non-word boundary
588 massager => sub { return (); },
590 # union, struct and enum definitions
591 # Because this one might appear a little everywhere within type
592 # definitions, we take it out and replace it with just
593 # 'union|struct|enum name' while registering it.
594 # This makes use of the parser trick to surround the outer braces
596 { regexp => qr/(.*) # Anything before ($1)
597 \b # word to non-word boundary
598 (union|struct|enum) # The word used ($2)
599 (?:\s([[:alpha:]_]\w*))? # Struct or enum name ($3)
600 <<<(\{.*?\})>>> # Struct or enum definition ($4)
601 (.*) # Anything after ($5)
608 || sprintf("__anon%03d", ++$anoncnt); # Anonymous struct
611 my $type = $word eq "struct" ? 'S' : 'E';
612 if ($before ne "" || $after ne ";") {
613 if ($after =~ m|^\w|) { $after = " ".$after; }
614 return ("$before$word $name$after;",
615 "$word $name", $type, "", "$word$definition", all_conds());
617 # If there was no before nor after, make the return much simple
618 return ("", "$word $name", $type, "", "$word$definition", all_conds());
621 # Named struct and enum forward declarations
622 # We really just ignore them, but we need to parse them or the variable
623 # declaration handler further down will think it's a variable declaration.
624 { regexp => qr/^(union|struct|enum) ([[:alpha:]_]\w*);/,
625 massager => sub { return (); }
627 # Function returning function pointer declaration
628 { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
629 ((?:\w|\*|\s)*?) # Return type ($2)
632 ([[:alpha:]_]\w*) # Function name ($3)
633 (\(.*\)) # Parameters ($4)
635 <<<(\(.*\))>>> # F.p. parameters ($5)
639 return ("", $3, 'F', "", "$2(*$4)$5", all_conds())
641 return ("", $3, 'F', "$2(*)$5", "$2(*$4)$5", all_conds()); }
643 # Function pointer declaration, or typedef thereof
644 { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
645 ((?:\w|\*|\s)*?) # Return type ($2)
646 <<<\(\*([[:alpha:]_]\w*)\)>>> # T.d. or var name ($3)
647 <<<(\(.*\))>>> # F.p. parameters ($4)
651 return ("", $3, 'T', "", "$2(*)$4", all_conds())
653 return ("", $3, 'V', "$2(*)$4", "$2(*)$4", all_conds());
656 # Function declaration, or typedef thereof
657 { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
658 ((?:\w|\*|\s)*?) # Return type ($2)
660 ([[:alpha:]_]\w*) # Function name ($3)
661 <<<(\(.*\))>>> # Parameters ($4)
665 return ("", $3, 'T', "", "$2$4", all_conds())
667 return ("", $3, 'F', $2, "$2$4", all_conds());
670 # Variable declaration, including arrays, or typedef thereof
671 { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
672 ((?:\w|\*|\s)*?) # Type ($2)
674 ([[:alpha:]_]\w*) # Variable name ($3)
675 ((?:<<<\[[^\]]*\]>>>)*) # Possible array declaration ($4)
679 return ("", $3, 'T', "", $2.($4||""), all_conds())
681 return ("", $3, 'V', $2.($4||""), $2.($4||""), all_conds());
686 # End handlers are almost the same as handlers, except they are run through
687 # ONCE when the input has been parsed through. These are used to check for
688 # remaining stuff, such as an unfinished #ifdef and stuff like that that the
689 # main parser can't check on its own.
694 die "Unfinished preprocessor conditions levels: ",scalar(@preprocessor_conds),($opts{filename} ? " in file ".$opts{filename}: ""),$opts{PLACE}
695 if @preprocessor_conds;
700 # takes a list of strings that can each contain one or several lines of code
701 # also takes a hash of options as last argument.
703 # returns a list of hashes with information:
705 # name name of the thing
706 # type type, see the massage handler function
707 # returntype return type of functions and variables
708 # value value for macros, signature for functions, variables
710 # conds preprocessor conditions (array ref)
714 if (ref($_[$#_]) eq "HASH") {
719 in_extern_C => 0, # An exception to parenthesis processing.
720 cpp_parens => [], # A list of ending parens and braces found in
721 # preprocessor directives
722 c_parens => [], # A list of ending parens and braces found in
724 in_string => "", # empty string when outside a string, otherwise
725 # "'" or '"' depending on the starting quote.
726 in_comment => "", # empty string when outside a comment, otherwise
727 # "/*" or "//" depending on the type of comment
728 # found. The latter will never be multiline
729 # NOTE: in_string and in_comment will never be
730 # true (in perl semantics) at the same time.
734 my $normalized_line = ""; # $input_line, but normalized. In essence, this
735 # means that ALL whitespace is removed unless
736 # it absolutely has to be present, and in that
737 # case, there's only one space.
738 # The cases where a space needs to stay present
741 # 2. between words and number
742 # 3. after the first word of a preprocessor
744 # 4. for the #define directive, between the macro
745 # name/args and its value, so we end up with:
747 # #define BAR(x) something(x)
748 my $collected_stmt = ""; # Where we're building up a C line until it's a
749 # complete definition/declaration, as determined
750 # by any handler being capable of matching it.
752 # We use $_ shamelessly when looking through @lines.
753 # In case we find a \ at the end, we keep filling it up with more lines.
756 foreach my $line (@_) {
757 # split tries to be smart when a string ends with the thing we split on
758 $line .= "\n" unless $line =~ m|\R$|;
761 # We use ¦undef¦ as a marker for a new line from the file.
762 # Since we convert one line to several and unshift that into @lines,
763 # that's the only safe way we have to track the original lines
764 my @lines = map { ( undef, $_ ) } split $/, $line;
766 # Remember that extra # we added above? Now we remove it
768 pop @lines; # Don't forget the undef
771 if (!defined($lines[0])) {
773 $state{current_line}++;
775 $opts{PLACE} = " at ".$opts{filename}." line ".$state{current_line}."\n";
776 $opts{PLACE2} = $opts{filename}.":".$state{current_line};
781 $_ = "" unless defined $_;
790 print STDERR "DEBUG:----------------------------\n";
791 print STDERR "DEBUG: \$_ = '$_'\n";
794 ##########################################################
795 # Now that we have a full line, let's process through it
797 unless ($state{in_comment}) {
798 # Begin with checking if the current $normalized_line
799 # contains a preprocessor directive
800 # This is only done if we're not inside a comment and
801 # if it's a preprocessor directive and it's finished.
802 if ($normalized_line =~ m|^#| && $_ eq "") {
803 print STDERR "DEBUG[OPENSSL CPP]: \$normalized_line = '$normalized_line'\n"
805 $opts{debug_type} = "OPENSSL CPP";
806 my @r = ( _run_handlers($normalized_line,
810 # Checking if there are lines to inject.
812 @r = split $/, (pop @r).$_;
813 print STDERR "DEBUG[OPENSSL CPP]: injecting '", join("', '", @r),"'\n"
814 if $opts{debug} && @r;
815 @lines = ( @r, @lines );
820 print STDERR "DEBUG[CPP]: \$normalized_line = '$normalized_line'\n"
822 $opts{debug_type} = "CPP";
823 my @r = ( _run_handlers($normalized_line,
827 if (ref($r[0]) eq "HASH") {
828 push @result, shift @r;
831 # Now, check if there are lines to inject.
832 # Really, this should never happen, it IS a
833 # preprocessor directive after all...
835 @r = split $/, pop @r;
836 print STDERR "DEBUG[CPP]: injecting '", join("', '", @r),"'\n"
837 if $opts{debug} && @r;
838 @lines = ( @r, @lines );
844 # Note: we simply ignore all directives that no
846 $normalized_line = "";
849 # If the two strings end and start with a character that
850 # shouldn't get concatenated, add a space
852 ($collected_stmt =~ m/(?:"|')$/
853 || ($collected_stmt =~ m/(?:\w|\d)$/
854 && $normalized_line =~ m/^(?:\w|\d)/)) ? " " : "";
856 # Now, unless we're building up a preprocessor directive or
857 # are in the middle of a string, or the parens et al aren't
858 # balanced up yet, let's try and see if there's a OpenSSL
859 # or C handler that can make sense of what we have so far.
860 if ( $normalized_line !~ m|^#|
861 && ($collected_stmt ne "" || $normalized_line ne "")
862 && ! @{$state{c_parens}}
863 && ! $state{in_string} ) {
865 print STDERR "DEBUG[OPENSSL C]: \$collected_stmt = '$collected_stmt'\n";
866 print STDERR "DEBUG[OPENSSL C]: \$normalized_line = '$normalized_line'\n";
868 $opts{debug_type} = "OPENSSL C";
869 my @r = ( _run_handlers($collected_stmt
875 # Checking if there are lines to inject.
877 @r = split $/, (pop @r).$_;
878 print STDERR "DEBUG[OPENSSL]: injecting '", join("', '", @r),"'\n"
879 if $opts{debug} && @r;
880 @lines = ( @r, @lines );
884 $normalized_line = "";
885 $collected_stmt = "";
888 print STDERR "DEBUG[C]: \$collected_stmt = '$collected_stmt'\n";
889 print STDERR "DEBUG[C]: \$normalized_line = '$normalized_line'\n";
891 $opts{debug_type} = "C";
892 my @r = ( _run_handlers($collected_stmt
898 if (ref($r[0]) eq "HASH") {
899 push @result, shift @r;
902 # Checking if there are lines to inject.
904 @r = split $/, (pop @r).$_;
905 print STDERR "DEBUG[C]: injecting '", join("', '", @r),"'\n"
906 if $opts{debug} && @r;
907 @lines = ( @r, @lines );
911 $normalized_line = "";
912 $collected_stmt = "";
917 $collected_stmt .= $space.$normalized_line;
918 $normalized_line = "";
927 # Take care of inside string first.
928 if ($state{in_string}) {
929 if (m/ (?:^|(?<!\\)) # Make sure it's not escaped
930 $state{in_string} # Look for matching quote
932 $normalized_line .= $`.$&;
933 $state{in_string} = "";
937 die "Unfinished string without continuation found$opts{PLACE}\n";
940 # ... or inside comments, whichever happens to apply
941 elsif ($state{in_comment}) {
943 # This should never happen
944 die "Something went seriously wrong, multiline //???$opts{PLACE}\n"
945 if ($state{in_comment} eq "//");
947 # A note: comments are simply discarded.
949 if (m/ (?:^|(?<!\\)) # Make sure it's not escaped
950 \*\/ # Look for C comment end
952 $state{in_comment} = "";
954 print STDERR "DEBUG: Found end of comment, followed by '$_'\n"
963 # At this point, it's safe to remove leading whites, but
964 # we need to be careful with some preprocessor lines
969 if ($normalized_line =~ m/^
970 \#define\s\w(?:\w|\d)*(?:<<<\([^\)]*\)>>>)?
973 print STDERR "DEBUG: Processing leading spaces: \$normalized_line = '$normalized_line', \$space = '$space', \$rest = '$rest'\n"
979 $normalized_line =~ m|^#| ? 'cpp_parens' : 'c_parens';
980 (my $paren_singular = $parens) =~ s|s$||;
982 # Now check for specific tokens, and if they are parens,
983 # check them against $state{$parens}. Note that we surround
984 # the outermost parens with extra "<<<" and ">>>". Those
985 # are for the benefit of handlers who to need to detect
986 # them, and they will be removed from the final output.
990 if (!@{$state{$parens}}) {
991 if ("$normalized_line$body" =~ m|^extern "C"\{$|) {
992 $state{in_extern_C} = 1;
993 print STDERR "DEBUG: found start of 'extern \"C\"' ($normalized_line$body)\n"
995 $normalized_line = "";
997 $normalized_line .= "<<<".$body;
1000 $normalized_line .= $body;
1003 if ($normalized_line ne "") {
1004 print STDERR "DEBUG: found $paren_singular start '$body'\n"
1006 $body =~ tr|\{\[\(|\}\]\)|;
1007 print STDERR "DEBUG: pushing $paren_singular end '$body'\n"
1009 push @{$state{$parens}}, $body;
1011 } elsif (m|^[\}\]\)]|) {
1014 if (!@{$state{$parens}}
1015 && $& eq '}' && $state{in_extern_C}) {
1016 print STDERR "DEBUG: found end of 'extern \"C\"'\n"
1018 $state{in_extern_C} = 0;
1020 print STDERR "DEBUG: Trying to match '$&' against '"
1021 ,join("', '", @{$state{$parens}})
1024 die "Unmatched parentheses$opts{PLACE}\n"
1025 unless (@{$state{$parens}}
1026 && pop @{$state{$parens}} eq $&);
1027 if (!@{$state{$parens}}) {
1028 $normalized_line .= $&.">>>";
1030 $normalized_line .= $&;
1033 } elsif (m|^["']|) { # string start
1037 # We want to separate strings from \w and \d with one space.
1038 $normalized_line .= " " if $normalized_line =~ m/(\w|\d)$/;
1039 $normalized_line .= $body;
1040 $state{in_string} = $body;
1041 } elsif (m|^\/\*|) { # C style comment
1042 print STDERR "DEBUG: found start of C style comment\n"
1044 $state{in_comment} = $&;
1046 } elsif (m|^\/\/|) { # C++ style comment
1047 print STDERR "DEBUG: found C++ style comment\n"
1049 $_ = ""; # (just discard it entirely)
1050 } elsif (m/^ (?| (?: 0[xX][[:xdigit:]]+ | 0[bB][01]+ | [0-9]+ )
1051 (?i: U | L | UL | LL | ULL )?
1052 | [0-9]+\.[0-9]+(?:[eE][\-\+]\d+)? (?i: F | L)?
1054 print STDERR "DEBUG: Processing numbers: \$normalized_line = '$normalized_line', \$& = '$&', \$' = '$''\n"
1056 $normalized_line .= $&;
1058 } elsif (m/^[[:alpha:]_]\w*/) {
1063 # Now, only add a space if it's needed to separate
1064 # two \w characters, and we also surround strings with
1065 # a space. In this case, that's if $normalized_line ends
1066 # with a \w, \d, " or '.
1068 if ($normalized_line =~ m/("|')$/
1069 || ($normalized_line =~ m/(\w|\d)$/
1070 && $body =~ m/^(\w|\d)/));
1072 print STDERR "DEBUG: Processing words: \$normalized_line = '$normalized_line', \$space = '$space', \$body = '$body', \$rest = '$rest'\n"
1074 $normalized_line .= $space.$body;
1076 } elsif (m|^(?:\\)?.|) { # Catch-all
1077 $normalized_line .= $&;
1083 foreach my $handler (@endhandlers) {
1084 if ($handler->{massager}) {
1085 $handler->{massager}->(\%opts);
1091 # arg1: line to check
1092 # arg2...: handlers to check
1093 # return undef when no handler matched
1096 if (ref($_[$#_]) eq "HASH") {
1103 foreach my $handler (@handlers) {
1104 if ($handler->{regexp}
1105 && $line =~ m|^$handler->{regexp}$|) {
1106 if ($handler->{massager}) {
1108 print STDERR "DEBUG[",$opts{debug_type},"]: Trying to handle '$line'\n";
1109 print STDERR "DEBUG[",$opts{debug_type},"]: (matches /\^",$handler->{regexp},"\$/)\n";
1111 my $saved_line = $line;
1113 map { s/(<<<|>>>)//g; $_ }
1114 $handler->{massager}->($saved_line, \%opts);
1115 print STDERR "DEBUG[",$opts{debug_type},"]: Got back '"
1116 , join("', '", @massaged), "'\n"
1119 # Because we may get back new lines to be
1120 # injected before whatever else that follows,
1121 # and the injected stuff might include
1122 # preprocessor lines, we need to inject them
1123 # in @lines and set $_ to the empty string to
1124 # break out from the inner loops
1125 my $injected_lines = shift @massaged || "";
1130 name => shift @massaged,
1131 type => shift @massaged,
1132 returntype => shift @massaged,
1133 value => shift @massaged,
1134 conds => [ @massaged ]
1139 print STDERR "DEBUG[",$opts{debug_type},"]: (ignore, possible side effects)\n"
1140 if $opts{debug} && $injected_lines eq "";
1141 return (1, $injected_lines);