--- /dev/null
+#! /usr/bin/env perl
+# Copyright 2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+package OpenSSL::ParseC;
+
+use strict;
+use warnings;
+
+use Exporter;
+use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
+$VERSION = "0.9";
+@ISA = qw(Exporter);
+@EXPORT = qw(parse);
+
+# Global handler data
+my @preprocessor_conds; # A list of simple preprocessor conditions,
+ # each item being a list of macros defined
+ # or not defined.
+
+# Handler helpers
+sub all_conds {
+ return map { ( @$_ ) } @preprocessor_conds;
+}
+
+# A list of handlers that will look at a "complete" string and try to
+# figure out what to make of it.
+# Each handler is a hash with the following keys:
+#
+# regexp a regexp to compare the "complete" string with.
+# checker a function that does a more complex comparison.
+# Use this instead of regexp if that isn't enough.
+# massager massages the "complete" string into an array with
+# the following elements:
+#
+# [0] String that needs further processing (this
+# applies to typedefs of structs), or empty.
+# [1] The name of what was found.
+# [2] A character that denotes what type of thing
+# this is: 'F' for function, 'S' for struct,
+# 'T' for typedef, 'M' for macro, 'V' for
+# variable.
+# [3] Return type (only for type 'F' and 'V')
+# [4] Value (for type 'M') or signature (for type 'F',
+# 'V', 'T' or 'S')
+# [5...] The list of preprocessor conditions this is
+# found in, as in checks for macro definitions
+# (stored as the macro's name) or the absence
+# of definition (stored as the macro's name
+# prefixed with a '!'
+#
+# If the massager returns an empty list, it means the
+# "complete" string has side effects but should otherwise
+# be ignored.
+# If the massager is undefined, the "complete" string
+# should be ignored.
+my @opensslcpphandlers = (
+ ##################################################################
+ # OpenSSL CPP specials
+ #
+ # These are used to convert certain pre-precessor expressions into
+ # others that @cpphandlers have a better chance to understand.
+
+ { regexp => qr/#if OPENSSL_API_COMPAT(\S+)(0x[0-9a-fA-F]{8})L$/,
+ massager => sub {
+ my $op = $1;
+ my $v = hex($2);
+ if ($op ne '<' && $op ne '>=') {
+ die "Error: unacceptable operator $op: $_[0]\n";
+ }
+ my ($one, $major, $minor) =
+ ( ($v >> 28) & 0xf,
+ ($v >> 20) & 0xff,
+ ($v >> 12) & 0xff );
+ my $t = "DEPRECATEDIN_${one}_${major}_${minor}";
+ my $cond = $op eq '<' ? 'ifndef' : 'ifdef';
+ return (<<"EOF");
+#$cond $t
+EOF
+ }
+ }
+);
+my @cpphandlers = (
+ ##################################################################
+ # CPP stuff
+
+ { regexp => qr/#ifdef ?(.*)/,
+ massager => sub {
+ my %opts;
+ if (ref($_[$#_]) eq "HASH") {
+ %opts = %{$_[$#_]};
+ pop @_;
+ }
+ push @preprocessor_conds, [ $1 ];
+ print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
+ if $opts{debug};
+ return ();
+ },
+ },
+ { regexp => qr/#ifndef ?(.*)/,
+ massager => sub {
+ my %opts;
+ if (ref($_[$#_]) eq "HASH") {
+ %opts = %{$_[$#_]};
+ pop @_;
+ }
+ push @preprocessor_conds, [ '!'.$1 ];
+ print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
+ if $opts{debug};
+ return ();
+ },
+ },
+ { regexp => qr/#if (0|1)/,
+ massager => sub {
+ my %opts;
+ if (ref($_[$#_]) eq "HASH") {
+ %opts = %{$_[$#_]};
+ pop @_;
+ }
+ if ($1 eq "1") {
+ push @preprocessor_conds, [ "TRUE" ];
+ } else {
+ push @preprocessor_conds, [ "!TRUE" ];
+ }
+ print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
+ if $opts{debug};
+ return ();
+ },
+ },
+ { regexp => qr/#if ?(.*)/,
+ massager => sub {
+ my %opts;
+ if (ref($_[$#_]) eq "HASH") {
+ %opts = %{$_[$#_]};
+ pop @_;
+ }
+ my @results = ();
+ my $conds = $1;
+ if ($conds =~ m|^defined<<<\(([^\)]*)\)>>>(.*)$|) {
+ push @results, $1; # Handle the simple case
+ my $rest = $2;
+ my $re = qr/^(?:\|\|defined<<<\([^\)]*\)>>>)*$/;
+ print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n"
+ if $opts{debug};
+ if ($rest =~ m/$re/) {
+ my @rest = split /\|\|/, $rest;
+ shift @rest;
+ foreach (@rest) {
+ m|^defined<<<\(([^\)]*)\)>>>$|;
+ die "Something wrong...$opts{PLACE}" if $1 eq "";
+ push @results, $1;
+ }
+ } else {
+ $conds =~ s/<<<|>>>//g;
+ warn "Warning: complicated #if expression(1): $conds$opts{PLACE}"
+ if $opts{warnings};
+ }
+ } elsif ($conds =~ m|^!defined<<<\(([^\)]*)\)>>>(.*)$|) {
+ push @results, '!'.$1; # Handle the simple case
+ my $rest = $2;
+ my $re = qr/^(?:\&\&!defined<<<\([^\)]*\)>>>)*$/;
+ print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n"
+ if $opts{debug};
+ if ($rest =~ m/$re/) {
+ my @rest = split /\&\&/, $rest;
+ shift @rest;
+ foreach (@rest) {
+ m|^!defined<<<\(([^\)]*)\)>>>$|;
+ die "Something wrong...$opts{PLACE}" if $1 eq "";
+ push @results, '!'.$1;
+ }
+ } else {
+ $conds =~ s/<<<|>>>//g;
+ warn "Warning: complicated #if expression(2): $conds$opts{PLACE}"
+ if $opts{warnings};
+ }
+ } else {
+ $conds =~ s/<<<|>>>//g;
+ warn "Warning: complicated #if expression(3): $conds$opts{PLACE}"
+ if $opts{warnings};
+ }
+ print STDERR "DEBUG[",$opts{debug_type},"]: Added preprocessor conds: '", join("', '", @results), "'\n"
+ if $opts{debug};
+ push @preprocessor_conds, [ @results ];
+ print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
+ if $opts{debug};
+ return ();
+ },
+ },
+ { regexp => qr/#elif (.*)/,
+ massager => sub {
+ my %opts;
+ if (ref($_[$#_]) eq "HASH") {
+ %opts = %{$_[$#_]};
+ pop @_;
+ }
+ die "An #elif without corresponding condition$opts{PLACE}"
+ if !@preprocessor_conds;
+ pop @preprocessor_conds;
+ print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
+ if $opts{debug};
+ return (<<"EOF");
+#if $1
+EOF
+ },
+ },
+ { regexp => qr/#else/,
+ massager => sub {
+ my %opts;
+ if (ref($_[$#_]) eq "HASH") {
+ %opts = %{$_[$#_]};
+ pop @_;
+ }
+ die "An #else without corresponding condition$opts{PLACE}"
+ if !@preprocessor_conds;
+ # Invert all conditions on the last level
+ my $stuff = pop @preprocessor_conds;
+ push @preprocessor_conds, [
+ map { m|^!(.*)$| ? $1 : '!'.$_ } @$stuff
+ ];
+ print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
+ if $opts{debug};
+ return ();
+ },
+ },
+ { regexp => qr/#endif ?/,
+ massager => sub {
+ my %opts;
+ if (ref($_[$#_]) eq "HASH") {
+ %opts = %{$_[$#_]};
+ pop @_;
+ }
+ die "An #endif without corresponding condition$opts{PLACE}"
+ if !@preprocessor_conds;
+ pop @preprocessor_conds;
+ print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
+ if $opts{debug};
+ return ();
+ },
+ },
+ { regexp => qr/#define ([[:alpha:]_]\w*)(<<<\(.*?\)>>>)?( (.*))?/,
+ massager => sub {
+ my $name = $1;
+ my $params = $2;
+ my $spaceval = $3||"";
+ my $val = $4||"";
+ return ("",
+ $1, 'M', "", $params ? "$name$params$spaceval" : $val,
+ all_conds()); }
+ },
+ { regexp => qr/#.*/,
+ massager => sub { return (); }
+ },
+ );
+
+my @opensslchandlers = (
+ ##################################################################
+ # OpenSSL C specials
+ #
+ # They are really preprocessor stuff, but they look like C stuff
+ # to this parser. All of these do replacements, anything else is
+ # an error.
+
+ #####
+ # Global variable stuff
+ { regexp => qr/OPENSSL_DECLARE_GLOBAL<<<\((.*),(.*)\)>>>;/,
+ massager => sub { return (<<"EOF");
+#ifndef OPENSSL_EXPORT_VAR_AS_FUNCTION
+OPENSSL_EXPORT $1 _shadow_$2;
+#else
+$1 *_shadow_$2(void);
+#endif
+EOF
+ },
+ },
+
+ #####
+ # Deprecated stuff, by OpenSSL release.
+
+ # We trick the parser by pretending that the declaration is wrapped in a
+ # check if the DEPRECATEDIN macro is defined or not. Callers of parse()
+ # will have to decide what to do with it.
+ { regexp => qr/(DEPRECATEDIN_\d+_\d+_\d+)<<<\((.*)\)>>>/,
+ massager => sub { return (<<"EOF");
+#ifndef $1
+$2;
+#endif
+EOF
+ },
+ },
+
+ #####
+ # LHASH stuff
+
+ # LHASH_OF(foo) is used as a type, but the chandlers won't take it
+ # gracefully, so we expand it here.
+ { regexp => qr/(.*)\bLHASH_OF<<<\((.*?)\)>>>(.*)/,
+ massager => sub { return ("$1struct lhash_st_$2$3"); }
+ },
+ { regexp => qr/DEFINE_LHASH_OF<<<\((.*)\)>>>/,
+ massager => sub {
+ return (<<"EOF");
+static ossl_inline LHASH_OF($1) * lh_$1_new(unsigned long (*hfn)(const $1 *),
+ int (*cfn)(const $1 *, const $1 *));
+static ossl_inline void lh_$1_free(LHASH_OF($1) *lh);
+static ossl_inline $1 *lh_$1_insert(LHASH_OF($1) *lh, $1 *d);
+static ossl_inline $1 *lh_$1_delete(LHASH_OF($1) *lh, const $1 *d);
+static ossl_inline $1 *lh_$1_retrieve(LHASH_OF($1) *lh, const $1 *d);
+static ossl_inline int lh_$1_error(LHASH_OF($1) *lh);
+static ossl_inline unsigned long lh_$1_num_items(LHASH_OF($1) *lh);
+static ossl_inline void lh_$1_node_stats_bio(const LHASH_OF($1) *lh, BIO *out);
+static ossl_inline void lh_$1_node_usage_stats_bio(const LHASH_OF($1) *lh,
+ BIO *out);
+static ossl_inline void lh_$1_stats_bio(const LHASH_OF($1) *lh, BIO *out);
+static ossl_inline unsigned long lh_$1_get_down_load(LHASH_OF($1) *lh);
+static ossl_inline void lh_$1_set_down_load(LHASH_OF($1) *lh, unsigned long dl);
+static ossl_inline void lh_$1_doall(LHASH_OF($1) *lh, void (*doall)($1 *));
+LHASH_OF($1)
+EOF
+ }
+ },
+
+ #####
+ # STACK stuff
+
+ # STACK_OF(foo) is used as a type, but the chandlers won't take it
+ # gracefully, so we expand it here.
+ { regexp => qr/(.*)\bSTACK_OF<<<\((.*?)\)>>>(.*)/,
+ massager => sub { return ("$1struct stack_st_$2$3"); }
+ },
+# { regexp => qr/(.*)\bSTACK_OF\((.*?)\)(.*)/,
+# massager => sub {
+# my $before = $1;
+# my $stack_of = "struct stack_st_$2";
+# my $after = $3;
+# if ($after =~ m|^\w|) { $after = " ".$after; }
+# return ("$before$stack_of$after");
+# }
+# },
+ { regexp => qr/SKM_DEFINE_STACK_OF<<<\((.*),(.*),(.*)\)>>>/,
+ massager => sub {
+ return (<<"EOF");
+STACK_OF($1);
+typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b);
+typedef void (*sk_$1_freefunc)($3 *a);
+typedef $3 * (*sk_$1_copyfunc)(const $3 *a);
+static ossl_inline int sk_$1_num(const STACK_OF($1) *sk);
+static ossl_inline $2 *sk_$1_value(const STACK_OF($1) *sk, int idx);
+static ossl_inline STACK_OF($1) *sk_$1_new(sk_$1_compfunc compare);
+static ossl_inline STACK_OF($1) *sk_$1_new_null(void);
+static ossl_inline STACK_OF($1) *sk_$1_new_reserve(sk_$1_compfunc compare,
+ int n);
+static ossl_inline int sk_$1_reserve(STACK_OF($1) *sk, int n);
+static ossl_inline void sk_$1_free(STACK_OF($1) *sk);
+static ossl_inline void sk_$1_zero(STACK_OF($1) *sk);
+static ossl_inline $2 *sk_$1_delete(STACK_OF($1) *sk, int i);
+static ossl_inline $2 *sk_$1_delete_ptr(STACK_OF($1) *sk, $2 *ptr);
+static ossl_inline int sk_$1_push(STACK_OF($1) *sk, $2 *ptr);
+static ossl_inline int sk_$1_unshift(STACK_OF($1) *sk, $2 *ptr);
+static ossl_inline $2 *sk_$1_pop(STACK_OF($1) *sk);
+static ossl_inline $2 *sk_$1_shift(STACK_OF($1) *sk);
+static ossl_inline void sk_$1_pop_free(STACK_OF($1) *sk,
+ sk_$1_freefunc freefunc);
+static ossl_inline int sk_$1_insert(STACK_OF($1) *sk, $2 *ptr, int idx);
+static ossl_inline $2 *sk_$1_set(STACK_OF($1) *sk, int idx, $2 *ptr);
+static ossl_inline int sk_$1_find(STACK_OF($1) *sk, $2 *ptr);
+static ossl_inline int sk_$1_find_ex(STACK_OF($1) *sk, $2 *ptr);
+static ossl_inline void sk_$1_sort(STACK_OF($1) *sk);
+static ossl_inline int sk_$1_is_sorted(const STACK_OF($1) *sk);
+static ossl_inline STACK_OF($1) * sk_$1_dup(const STACK_OF($1) *sk);
+static ossl_inline STACK_OF($1) *sk_$1_deep_copy(const STACK_OF($1) *sk,
+ sk_$1_copyfunc copyfunc,
+ sk_$1_freefunc freefunc);
+static ossl_inline sk_$1_compfunc sk_$1_set_cmp_func(STACK_OF($1) *sk,
+ sk_$1_compfunc compare);
+EOF
+ }
+ },
+ { regexp => qr/DEFINE_SPECIAL_STACK_OF<<<\((.*),(.*)\)>>>/,
+ massager => sub { return ("SKM_DEFINE_STACK_OF($1,$2,$2)"); },
+ },
+ { regexp => qr/DEFINE_STACK_OF<<<\((.*)\)>>>/,
+ massager => sub { return ("SKM_DEFINE_STACK_OF($1,$1,$1)"); },
+ },
+ { regexp => qr/DEFINE_SPECIAL_STACK_OF_CONST<<<\((.*),(.*)\)>>>/,
+ massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $2,$2)"); },
+ },
+ { regexp => qr/DEFINE_STACK_OF_CONST<<<\((.*)\)>>>/,
+ massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $1,$1)"); },
+ },
+ { regexp => qr/PREDECLARE_STACK_OF<<<\((.*)\)>>>/,
+ massager => sub { return ("STACK_OF($1);"); }
+ },
+ { regexp => qr/DECLARE_STACK_OF<<<\((.*)\)>>>/,
+ massager => sub { return ("STACK_OF($1);"); }
+ },
+ { regexp => qr/DECLARE_SPECIAL_STACK_OF<<<\((.*?),(.*?)\)>>>/,
+ massager => sub { return ("STACK_OF($1);"); }
+ },
+
+ #####
+ # ASN1 stuff
+
+ { regexp => qr/TYPEDEF_D2I_OF<<<\((.*)\)>>>/,
+ massager => sub {
+ return ("typedef $1 *d2i_of_$1($1 **,const unsigned char **,long)");
+ },
+ },
+ { regexp => qr/TYPEDEF_I2D_OF<<<\((.*)\)>>>/,
+ massager => sub {
+ return ("typedef $1 *i2d_of_$1($1 *,unsigned char **)");
+ },
+ },
+ { regexp => qr/TYPEDEF_D2I2D_OF<<<\((.*)\)>>>/,
+ massager => sub {
+ return ("TYPEDEF_D2I_OF($1); TYPEDEF_I2D_OF($1)");
+ },
+ },
+ { regexp => qr/DECLARE_ASN1_ITEM<<<\((.*)\)>>>/,
+ massager => sub {
+ return (<<"EOF");
+#ifndef OPENSSL_EXPORT_VAR_AS_FUNCTION
+OPENSSL_EXTERN const ASN1_ITEM *$1_it;
+#else
+const ASN1_ITEM *$1_it(void);
+#endif
+EOF
+ },
+ },
+ { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS<<<\((.*),(.*),(.*)\)>>>/,
+ massager => sub {
+ return (<<"EOF");
+int d2i_$3(void);
+int i2d_$3(void);
+DECLARE_ASN1_ITEM($2)
+EOF
+ },
+ },
+ { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_const<<<\((.*),(.*)\)>>>/,
+ massager => sub {
+ return (<<"EOF");
+int d2i_$2(void);
+int i2d_$2(void);
+DECLARE_ASN1_ITEM($2)
+EOF
+ },
+ },
+ { regexp => qr/DECLARE_ASN1_ALLOC_FUNCTIONS<<<\((.*)\)>>>/,
+ massager => sub {
+ return (<<"EOF");
+int $1_free(void);
+int $1_new(void);
+EOF
+ },
+ },
+ { regexp => qr/DECLARE_ASN1_FUNCTIONS_name<<<\((.*),(.*)\)>>>/,
+ massager => sub {
+ return (<<"EOF");
+int d2i_$2(void);
+int i2d_$2(void);
+int $2_free(void);
+int $2_new(void);
+DECLARE_ASN1_ITEM($2)
+EOF
+ },
+ },
+ { regexp => qr/DECLARE_ASN1_FUNCTIONS_fname<<<\((.*),(.*),(.*)\)>>>/,
+ massager => sub { return (<<"EOF");
+int d2i_$3(void);
+int i2d_$3(void);
+int $3_free(void);
+int $3_new(void);
+DECLARE_ASN1_ITEM($2)
+EOF
+ }
+ },
+ { regexp => qr/DECLARE_ASN1_FUNCTIONS(?:_const)?<<<\((.*)\)>>>/,
+ massager => sub { return (<<"EOF");
+int d2i_$1(void);
+int i2d_$1(void);
+int $1_free(void);
+int $1_new(void);
+DECLARE_ASN1_ITEM($1)
+EOF
+ }
+ },
+ { regexp => qr/DECLARE_ASN1_NDEF_FUNCTION<<<\((.*)\)>>>/,
+ massager => sub {
+ return (<<"EOF");
+int i2d_$1_NDEF(void);
+EOF
+ }
+ },
+ { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION<<<\((.*)\)>>>/,
+ massager => sub {
+ return (<<"EOF");
+int $1_print_ctx(void);
+EOF
+ }
+ },
+ { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION_name<<<\((.*),(.*)\)>>>/,
+ massager => sub {
+ return (<<"EOF");
+int $2_print_ctx(void);
+EOF
+ }
+ },
+ { regexp => qr/DECLARE_ASN1_SET_OF<<<\((.*)\)>>>/,
+ massager => sub { return (); }
+ },
+ { regexp => qr/DECLARE_PKCS12_SET_OF<<<\((.*)\)>>>/,
+ massager => sub { return (); }
+ },
+ { regexp => qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)<<<\((.*?),.*\)>>>/,
+ massager => sub { return (<<"EOF");
+#ifndef OPENSSL_NO_STDIO
+int PEM_read_$1(void);
+int PEM_write_$1(void);
+#endif
+int PEM_read_bio_$1(void);
+int PEM_write_bio_$1(void);
+EOF
+ },
+ },
+
+ #####
+ # PEM stuff
+ { regexp => qr/DECLARE_PEM(?|_write|_write_cb|_write_const)<<<\((.*?),.*\)>>>/,
+ massager => sub { return (<<"EOF");
+#ifndef OPENSSL_NO_STDIO
+int PEM_write_$1(void);
+#endif
+int PEM_write_bio_$1(void);
+EOF
+ },
+ },
+ { regexp => qr/DECLARE_PEM(?|_read|_read_cb)<<<\((.*?),.*\)>>>/,
+ massager => sub { return (<<"EOF");
+#ifndef OPENSSL_NO_STDIO
+int PEM_read_$1(void);
+#endif
+int PEM_read_bio_$1(void);
+EOF
+ },
+ },
+
+ # Spurious stuff found in the OpenSSL headers
+ # Usually, these are just macros that expand to, well, something
+ { regexp => qr/__NDK_FPABI__/,
+ massager => sub { return (); }
+ },
+ );
+
+my $anoncnt = 0;
+
+my @chandlers = (
+ ##################################################################
+ # C stuff
+
+ # extern "C" of individual items
+ # Note that the main parse function has a special hack for 'extern "C" {'
+ # which can't be done in handlers
+ # We simply ignore it.
+ { regexp => qr/extern "C" (.*;)/,
+ massager => sub { return ($1); },
+ },
+ # union, struct and enum definitions
+ # Because this one might appear a little everywhere within type
+ # definitions, we take it out and replace it with just
+ # 'union|struct|enum name' while registering it.
+ # This makes use of the parser trick to surround the outer braces
+ # with <<< and >>>
+ { regexp => qr/(.*) # Anything before ($1)
+ \b # word to non-word boundary
+ (union|struct|enum) # The word used ($2)
+ (?:\s([[:alpha:]_]\w*))? # Struct or enum name ($3)
+ <<<(\{.*?\})>>> # Struct or enum definition ($4)
+ (.*) # Anything after ($5)
+ ;
+ /x,
+ massager => sub {
+ my $before = $1;
+ my $word = $2;
+ my $name = $3
+ || sprintf("__anon%03d", ++$anoncnt); # Anonymous struct
+ my $definition = $4;
+ my $after = $5;
+ my $type = $word eq "struct" ? 'S' : 'E';
+ if ($before ne "" || $after ne ";") {
+ if ($after =~ m|^\w|) { $after = " ".$after; }
+ return ("$before$word $name$after;",
+ "$word $name", $type, "", "$word$definition", all_conds());
+ }
+ # If there was no before nor after, make the return much simple
+ return ("", "$word $name", $type, "", "$word$definition", all_conds());
+ }
+ },
+ # Named struct and enum forward declarations
+ # We really just ignore them, but we need to parse them or the variable
+ # declaration handler further down will think it's a variable declaration.
+ { regexp => qr/^(union|struct|enum) ([[:alpha:]_]\w*);/,
+ massager => sub { return (); }
+ },
+ # Function returning function pointer declaration
+ { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
+ ((?:\w|\*|\s)*?) # Return type ($2)
+ \s? # Possible space
+ <<<\(\*
+ ([[:alpha:]_]\w*) # Function name ($3)
+ (\(.*\)) # Parameters ($4)
+ \)>>>
+ <<<(\(.*\))>>> # F.p. parameters ($5)
+ ;
+ /x,
+ massager => sub {
+ return ("", $3, 'F', "", "$2(*$4)$5", all_conds())
+ if defined $1;
+ return ("", $3, 'F', "$2(*)$5", "$2(*$4)$5", all_conds()); }
+ },
+ # Function pointer declaration, or typedef thereof
+ { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
+ ((?:\w|\*|\s)*?) # Return type ($2)
+ <<<\(\*([[:alpha:]_]\w*)\)>>> # T.d. or var name ($3)
+ <<<(\(.*\))>>> # F.p. parameters ($4)
+ ;
+ /x,
+ massager => sub {
+ return ("", $3, 'T', "", "$2(*)$4", all_conds())
+ if defined $1;
+ return ("", $3, 'V', "$2(*)$4", "$2(*)$4", all_conds());
+ },
+ },
+ # Function declaration, or typedef thereof
+ { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
+ ((?:\w|\*|\s)*?) # Return type ($2)
+ \s? # Possible space
+ ([[:alpha:]_]\w*) # Function name ($3)
+ <<<(\(.*\))>>> # Parameters ($4)
+ ;
+ /x,
+ massager => sub {
+ return ("", $3, 'T', "", "$2$4", all_conds())
+ if defined $1;
+ return ("", $3, 'F', $2, "$2$4", all_conds());
+ },
+ },
+ # Variable declaration, including arrays, or typedef thereof
+ { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
+ ((?:\w|\*|\s)*?) # Type ($2)
+ \s? # Possible space
+ ([[:alpha:]_]\w*) # Variable name ($3)
+ ((?:<<<\[[^\]]*\]>>>)*) # Possible array declaration ($4)
+ ;
+ /x,
+ massager => sub {
+ return ("", $3, 'T', "", $2.($4||""), all_conds())
+ if defined $1;
+ return ("", $3, 'V', $2.($4||""), $2.($4||""), all_conds());
+ },
+ },
+);
+
+# End handlers are almost the same as handlers, except they are run through
+# ONCE when the input has been parsed through. These are used to check for
+# remaining stuff, such as an unfinished #ifdef and stuff like that that the
+# main parser can't check on its own.
+my @endhandlers = (
+ { massager => sub {
+ my %opts = %{$_[0]};
+
+ die "Unfinished preprocessor conditions levels: ",scalar(@preprocessor_conds),($opts{filename} ? " in file ".$opts{filename}: ""),$opts{PLACE}
+ if @preprocessor_conds;
+ }
+ }
+ );
+
+# takes a list of strings that can each contain one or several lines of code
+# also takes a hash of options as last argument.
+#
+# returns a list of hashes with information:
+#
+# name name of the thing
+# type type, see the massage handler function
+# returntype return type of functions and variables
+# value value for macros, signature for functions, variables
+# and structs
+# conds preprocessor conditions (array ref)
+
+sub parse {
+ my %opts;
+ if (ref($_[$#_]) eq "HASH") {
+ %opts = %{$_[$#_]};
+ pop @_;
+ }
+ my %state = (
+ in_extern_C => 0, # An exception to parenthesis processing.
+ cpp_parens => [], # A list of ending parens and braces found in
+ # preprocessor directives
+ c_parens => [], # A list of ending parens and braces found in
+ # C statements
+ in_string => "", # empty string when outside a string, otherwise
+ # "'" or '"' depending on the starting quote.
+ in_comment => "", # empty string when outside a comment, otherwise
+ # "/*" or "//" depending on the type of comment
+ # found. The latter will never be multiline
+ # NOTE: in_string and in_comment will never be
+ # true (in perl semantics) at the same time.
+ current_line => 0,
+ );
+ my @result = ();
+ my $normalized_line = ""; # $input_line, but normalized. In essence, this
+ # means that ALL whitespace is removed unless
+ # it absolutely has to be present, and in that
+ # case, there's only one space.
+ # The cases where a space needs to stay present
+ # are:
+ # 1. between words
+ # 2. between words and number
+ # 3. after the first word of a preprocessor
+ # directive.
+ # 4. for the #define directive, between the macro
+ # name/args and its value, so we end up with:
+ # #define FOO val
+ # #define BAR(x) something(x)
+ my $collected_stmt = ""; # Where we're building up a C line until it's a
+ # complete definition/declaration, as determined
+ # by any handler being capable of matching it.
+
+ # We use $_ shamelessly when looking through @lines.
+ # In case we find a \ at the end, we keep filling it up with more lines.
+ $_ = undef;
+
+ foreach my $line (@_) {
+ # split tries to be smart when a string ends with the thing we split on
+ $line .= "\n" unless $line =~ m|\R$|;
+ $line .= "#";
+
+ # We use ¦undef¦ as a marker for a new line from the file.
+ # Since we convert one line to several and unshift that into @lines,
+ # that's the only safe way we have to track the original lines
+ my @lines = map { ( undef, $_ ) } split $/, $line;
+
+ # Remember that extra # we added above? Now we remove it
+ pop @lines;
+ pop @lines; # Don't forget the undef
+
+ while (@lines) {
+ if (!defined($lines[0])) {
+ shift @lines;
+ $state{current_line}++;
+ if (!defined($_)) {
+ $opts{PLACE} = " at ".$opts{filename}." line ".$state{current_line}."\n";
+ $opts{PLACE2} = $opts{filename}.":".$state{current_line};
+ }
+ next;
+ }
+
+ $_ = "" unless defined $_;
+ $_ .= shift @lines;
+
+ if (m|\\$|) {
+ $_ = $`;
+ next;
+ }
+
+ if ($opts{debug}) {
+ print STDERR "DEBUG:----------------------------\n";
+ print STDERR "DEBUG: \$_ = '$_'\n";
+ }
+
+ ##########################################################
+ # Now that we have a full line, let's process through it
+ while(1) {
+ unless ($state{in_comment}) {
+ # Begin with checking if the current $normalized_line
+ # contains a preprocessor directive
+ # This is only done if we're not inside a comment and
+ # if it's a preprocessor directive and it's finished.
+ if ($normalized_line =~ m|^#| && $_ eq "") {
+ print STDERR "DEBUG[OPENSSL CPP]: \$normalized_line = '$normalized_line'\n"
+ if $opts{debug};
+ $opts{debug_type} = "OPENSSL CPP";
+ my @r = ( _run_handlers($normalized_line,
+ @opensslcpphandlers,
+ \%opts) );
+ if (shift @r) {
+ # Checking if there are lines to inject.
+ if (@r) {
+ @r = split $/, (pop @r).$_;
+ print STDERR "DEBUG[OPENSSL CPP]: injecting '", join("', '", @r),"'\n"
+ if $opts{debug} && @r;
+ @lines = ( @r, @lines );
+
+ $_ = "";
+ }
+ } else {
+ print STDERR "DEBUG[CPP]: \$normalized_line = '$normalized_line'\n"
+ if $opts{debug};
+ $opts{debug_type} = "CPP";
+ my @r = ( _run_handlers($normalized_line,
+ @cpphandlers,
+ \%opts) );
+ if (shift @r) {
+ if (ref($r[0]) eq "HASH") {
+ push @result, shift @r;
+ }
+
+ # Now, check if there are lines to inject.
+ # Really, this should never happen, it IS a
+ # preprocessor directive after all...
+ if (@r) {
+ @r = split $/, pop @r;
+ print STDERR "DEBUG[CPP]: injecting '", join("', '", @r),"'\n"
+ if $opts{debug} && @r;
+ @lines = ( @r, @lines );
+ $_ = "";
+ }
+ }
+ }
+
+ # Note: we simply ignore all directives that no
+ # handler matches
+ $normalized_line = "";
+ }
+
+ # If the two strings end and start with a character that
+ # shouldn't get concatenated, add a space
+ my $space =
+ ($collected_stmt =~ m/(?:"|')$/
+ || ($collected_stmt =~ m/(?:\w|\d)$/
+ && $normalized_line =~ m/^(?:\w|\d)/)) ? " " : "";
+
+ # Now, unless we're building up a preprocessor directive or
+ # are in the middle of a string, or the parens et al aren't
+ # balanced up yet, let's try and see if there's a OpenSSL
+ # or C handler that can make sense of what we have so far.
+ if ( $normalized_line !~ m|^#|
+ && ($collected_stmt ne "" || $normalized_line ne "")
+ && ! @{$state{c_parens}}
+ && ! $state{in_string} ) {
+ if ($opts{debug}) {
+ print STDERR "DEBUG[OPENSSL C]: \$collected_stmt = '$collected_stmt'\n";
+ print STDERR "DEBUG[OPENSSL C]: \$normalized_line = '$normalized_line'\n";
+ }
+ $opts{debug_type} = "OPENSSL C";
+ my @r = ( _run_handlers($collected_stmt
+ .$space
+ .$normalized_line,
+ @opensslchandlers,
+ \%opts) );
+ if (shift @r) {
+ # Checking if there are lines to inject.
+ if (@r) {
+ @r = split $/, (pop @r).$_;
+ print STDERR "DEBUG[OPENSSL]: injecting '", join("', '", @r),"'\n"
+ if $opts{debug} && @r;
+ @lines = ( @r, @lines );
+
+ $_ = "";
+ }
+ $normalized_line = "";
+ $collected_stmt = "";
+ } else {
+ if ($opts{debug}) {
+ print STDERR "DEBUG[C]: \$collected_stmt = '$collected_stmt'\n";
+ print STDERR "DEBUG[C]: \$normalized_line = '$normalized_line'\n";
+ }
+ $opts{debug_type} = "C";
+ my @r = ( _run_handlers($collected_stmt
+ .$space
+ .$normalized_line,
+ @chandlers,
+ \%opts) );
+ if (shift @r) {
+ if (ref($r[0]) eq "HASH") {
+ push @result, shift @r;
+ }
+
+ # Checking if there are lines to inject.
+ if (@r) {
+ @r = split $/, (pop @r).$_;
+ print STDERR "DEBUG[C]: injecting '", join("', '", @r),"'\n"
+ if $opts{debug} && @r;
+ @lines = ( @r, @lines );
+
+ $_ = "";
+ }
+ $normalized_line = "";
+ $collected_stmt = "";
+ }
+ }
+ }
+ if ($_ eq "") {
+ $collected_stmt .= $space.$normalized_line;
+ $normalized_line = "";
+ }
+ }
+
+ if ($_ eq "") {
+ $_ = undef;
+ last;
+ }
+
+ # Take care of inside string first.
+ if ($state{in_string}) {
+ if (m/ (?:^|(?<!\\)) # Make sure it's not escaped
+ $state{in_string} # Look for matching quote
+ /x) {
+ $normalized_line .= $`.$&;
+ $state{in_string} = "";
+ $_ = $';
+ next;
+ } else {
+ die "Unfinished string without continuation found$opts{PLACE}\n";
+ }
+ }
+ # ... or inside comments, whichever happens to apply
+ elsif ($state{in_comment}) {
+
+ # This should never happen
+ die "Something went seriously wrong, multiline //???$opts{PLACE}\n"
+ if ($state{in_comment} eq "//");
+
+ # A note: comments are simply discarded.
+
+ if (m/ (?:^|(?<!\\)) # Make sure it's not escaped
+ \*\/ # Look for C comment end
+ /x) {
+ $state{in_comment} = "";
+ $_ = $';
+ print STDERR "DEBUG: Found end of comment, followed by '$_'\n"
+ if $opts{debug};
+ next;
+ } else {
+ $_ = "";
+ next;
+ }
+ }
+
+ # At this point, it's safe to remove leading whites, but
+ # we need to be careful with some preprocessor lines
+ if (m|^\s+|) {
+ my $rest = $';
+ my $space = "";
+ $space = " "
+ if ($normalized_line =~ m/^
+ \#define\s\w(?:\w|\d)*(?:<<<\([^\)]*\)>>>)?
+ | \#[a-z]+
+ $/x);
+ print STDERR "DEBUG: Processing leading spaces: \$normalized_line = '$normalized_line', \$space = '$space', \$rest = '$rest'\n"
+ if $opts{debug};
+ $_ = $space.$rest;
+ }
+
+ my $parens =
+ $normalized_line =~ m|^#| ? 'cpp_parens' : 'c_parens';
+ (my $paren_singular = $parens) =~ s|s$||;
+
+ # Now check for specific tokens, and if they are parens,
+ # check them against $state{$parens}. Note that we surround
+ # the outermost parens with extra "<<<" and ">>>". Those
+ # are for the benefit of handlers who to need to detect
+ # them, and they will be removed from the final output.
+ if (m|^[\{\[\(]|) {
+ my $body = $&;
+ $_ = $';
+ if (!@{$state{$parens}}) {
+ if ("$normalized_line$body" =~ m|^extern "C"\{$|) {
+ $state{in_extern_C} = 1;
+ print STDERR "DEBUG: found start of 'extern \"C\"' ($normalized_line$body)\n"
+ if $opts{debug};
+ $normalized_line = "";
+ } else {
+ $normalized_line .= "<<<".$body;
+ }
+ } else {
+ $normalized_line .= $body;
+ }
+
+ if ($normalized_line ne "") {
+ print STDERR "DEBUG: found $paren_singular start '$body'\n"
+ if $opts{debug};
+ $body =~ tr|\{\[\(|\}\]\)|;
+ print STDERR "DEBUG: pushing $paren_singular end '$body'\n"
+ if $opts{debug};
+ push @{$state{$parens}}, $body;
+ }
+ } elsif (m|^[\}\]\)]|) {
+ $_ = $';
+
+ if (!@{$state{$parens}}
+ && $& eq '}' && $state{in_extern_C}) {
+ print STDERR "DEBUG: found end of 'extern \"C\"'\n"
+ if $opts{debug};
+ $state{in_extern_C} = 0;
+ } else {
+ print STDERR "DEBUG: Trying to match '$&' against '"
+ ,join("', '", @{$state{$parens}})
+ ,"'\n"
+ if $opts{debug};
+ die "Unmatched parentheses$opts{PLACE}\n"
+ unless (@{$state{$parens}}
+ && pop @{$state{$parens}} eq $&);
+ if (!@{$state{$parens}}) {
+ $normalized_line .= $&.">>>";
+ } else {
+ $normalized_line .= $&;
+ }
+ }
+ } elsif (m|^["']|) { # string start
+ my $body = $&;
+ $_ = $';
+
+ # We want to separate strings from \w and \d with one space.
+ $normalized_line .= " " if $normalized_line =~ m/(\w|\d)$/;
+ $normalized_line .= $body;
+ $state{in_string} = $body;
+ } elsif (m|^\/\*|) { # C style comment
+ print STDERR "DEBUG: found start of C style comment\n"
+ if $opts{debug};
+ $state{in_comment} = $&;
+ $_ = $';
+ } elsif (m|^\/\/|) { # C++ style comment
+ print STDERR "DEBUG: found C++ style comment\n"
+ if $opts{debug};
+ $_ = ""; # (just discard it entirely)
+ } elsif (m/^ (?| (?: 0[xX][[:xdigit:]]+ | 0[bB][01]+ | [0-9]+ )
+ (?i: U | L | UL | LL | ULL )?
+ | [0-9]+\.[0-9]+(?:[eE][\-\+]\d+)? (?i: F | L)?
+ ) /x) {
+ print STDERR "DEBUG: Processing numbers: \$normalized_line = '$normalized_line', \$& = '$&', \$' = '$''\n"
+ if $opts{debug};
+ $normalized_line .= $&;
+ $_ = $';
+ } elsif (m/^[[:alpha:]_]\w*/) {
+ my $body = $&;
+ my $rest = $';
+ my $space = "";
+
+ # Now, only add a space if it's needed to separate
+ # two \w characters, and we also surround strings with
+ # a space. In this case, that's if $normalized_line ends
+ # with a \w, \d, " or '.
+ $space = " "
+ if ($normalized_line =~ m/("|')$/
+ || ($normalized_line =~ m/(\w|\d)$/
+ && $body =~ m/^(\w|\d)/));
+
+ print STDERR "DEBUG: Processing words: \$normalized_line = '$normalized_line', \$space = '$space', \$body = '$body', \$rest = '$rest'\n"
+ if $opts{debug};
+ $normalized_line .= $space.$body;
+ $_ = $rest;
+ } elsif (m|^(?:\\)?.|) { # Catch-all
+ $normalized_line .= $&;
+ $_ = $';
+ }
+ }
+ }
+ }
+ foreach my $handler (@endhandlers) {
+ if ($handler->{massager}) {
+ $handler->{massager}->(\%opts);
+ }
+ }
+ return @result;
+}
+
+# arg1: line to check
+# arg2...: handlers to check
+# return undef when no handler matched
+sub _run_handlers {
+ my %opts;
+ if (ref($_[$#_]) eq "HASH") {
+ %opts = %{$_[$#_]};
+ pop @_;
+ }
+ my $line = shift;
+ my @handlers = @_;
+
+ foreach my $handler (@handlers) {
+ if ($handler->{regexp}
+ && $line =~ m|^$handler->{regexp}$|) {
+ if ($handler->{massager}) {
+ if ($opts{debug}) {
+ print STDERR "DEBUG[",$opts{debug_type},"]: Trying to handle '$line'\n";
+ print STDERR "DEBUG[",$opts{debug_type},"]: (matches /\^",$handler->{regexp},"\$/)\n";
+ }
+ my $saved_line = $line;
+ my @massaged =
+ map { s/(<<<|>>>)//g; $_ }
+ $handler->{massager}->($saved_line, \%opts);
+ print STDERR "DEBUG[",$opts{debug_type},"]: Got back '"
+ , join("', '", @massaged), "'\n"
+ if $opts{debug};
+
+ # Because we may get back new lines to be
+ # injected before whatever else that follows,
+ # and the injected stuff might include
+ # preprocessor lines, we need to inject them
+ # in @lines and set $_ to the empty string to
+ # break out from the inner loops
+ my $injected_lines = shift @massaged || "";
+
+ if (@massaged) {
+ return (1,
+ {
+ name => shift @massaged,
+ type => shift @massaged,
+ returntype => shift @massaged,
+ value => shift @massaged,
+ conds => [ @massaged ]
+ },
+ $injected_lines
+ );
+ } else {
+ print STDERR "DEBUG[",$opts{debug_type},"]: (ignore, possible side effects)\n"
+ if $opts{debug} && $injected_lines eq "";
+ return (1, $injected_lines);
+ }
+ }
+ return (1);
+ }
+ }
+ return (0);
+}