2 tre-internal.h - TRE internal definitions
4 Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>.
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
26 #define TRE_MULTIBYTE 1
29 #define TRE_USE_SYSTEM_WCTYPE 1
30 #define HAVE_WCSTOMBS 1
31 #define TRE_MB_CUR_MAX MB_CUR_MAX
35 #define TRE_REGEX_T_FIELD __opaque
36 typedef int reg_errcode_t;
38 typedef wchar_t tre_char_t;
43 #define DPRINT(msg) do {printf msg; fflush(stdout);} while(0)
44 #else /* !TRE_DEBUG */
45 #define DPRINT(msg) do { } while(0)
46 #endif /* !TRE_DEBUG */
48 #define elementsof(x) ( sizeof(x) / sizeof(x[0]) )
51 int __mbtowc(wchar_t *, const char *);
52 #define tre_mbrtowc(pwc, s, n, ps) (__mbtowc((pwc), (s)))
54 #define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
57 /* Wide characters. */
58 typedef wint_t tre_cint_t;
59 #define TRE_CHAR_MAX WCHAR_MAX
62 #define TRE_MB_CUR_MAX MB_CUR_MAX
63 #else /* !TRE_MULTIBYTE */
64 #define TRE_MB_CUR_MAX 1
65 #endif /* !TRE_MULTIBYTE */
67 #define tre_isalnum iswalnum
68 #define tre_isalpha iswalpha
69 #define tre_isblank iswblank
70 #define tre_iscntrl iswcntrl
71 #define tre_isdigit iswdigit
72 #define tre_isgraph iswgraph
73 #define tre_islower iswlower
74 #define tre_isprint iswprint
75 #define tre_ispunct iswpunct
76 #define tre_isspace iswspace
77 #define tre_isupper iswupper
78 #define tre_isxdigit iswxdigit
80 #define tre_tolower towlower
81 #define tre_toupper towupper
82 #define tre_strlen wcslen
84 /* Use system provided iswctype() and wctype(). */
85 typedef wctype_t tre_ctype_t;
86 #define tre_isctype iswctype
87 #define tre_ctype wctype
89 /* Returns number of bytes to add to (char *)ptr to make it
90 properly aligned for the type. */
91 #define ALIGN(ptr, type) \
92 ((((long)ptr) % sizeof(type)) \
93 ? (sizeof(type) - (((long)ptr) % sizeof(type))) \
98 #define MAX(a, b) (((a) >= (b)) ? (a) : (b))
99 #define MIN(a, b) (((a) <= (b)) ? (a) : (b))
101 /* Define STRF to the correct printf formatter for strings. */
104 /* TNFA transition type. A TNFA state is an array of transitions,
105 the terminator is a transition with NULL `state'. */
106 typedef struct tnfa_transition tre_tnfa_transition_t;
108 struct tnfa_transition {
109 /* Range of accepted characters. */
112 /* Pointer to the destination state. */
113 tre_tnfa_transition_t *state;
114 /* ID number of the destination state. */
116 /* -1 terminated array of tags (or NULL). */
118 /* Assertion bitmap. */
120 /* Assertion parameters. */
122 /* Character class assertion. */
124 /* Back reference assertion. */
127 /* Negative character class assertions. */
128 tre_ctype_t *neg_classes;
133 #define ASSERT_AT_BOL 1 /* Beginning of line. */
134 #define ASSERT_AT_EOL 2 /* End of line. */
135 #define ASSERT_CHAR_CLASS 4 /* Character class in `class'. */
136 #define ASSERT_CHAR_CLASS_NEG 8 /* Character classes in `neg_classes'. */
137 #define ASSERT_AT_BOW 16 /* Beginning of word. */
138 #define ASSERT_AT_EOW 32 /* End of word. */
139 #define ASSERT_AT_WB 64 /* Word boundary. */
140 #define ASSERT_AT_WB_NEG 128 /* Not a word boundary. */
141 #define ASSERT_BACKREF 256 /* A back reference in `backref'. */
142 #define ASSERT_LAST 256
144 /* Tag directions. */
146 TRE_TAG_MINIMIZE = 0,
148 } tre_tag_direction_t;
150 /* Instructions to compute submatch register values from tag values
151 after a successful match. */
152 struct tre_submatch_data {
153 /* Tag that gives the value for rm_so (submatch start offset). */
155 /* Tag that gives the value for rm_eo (submatch end offset). */
157 /* List of submatches this submatch is contained in. */
161 typedef struct tre_submatch_data tre_submatch_data_t;
164 /* TNFA definition. */
165 typedef struct tnfa tre_tnfa_t;
168 tre_tnfa_transition_t *transitions;
169 unsigned int num_transitions;
170 tre_tnfa_transition_t *initial;
171 tre_tnfa_transition_t *final;
172 tre_submatch_data_t *submatch_data;
173 unsigned int num_submatches;
174 tre_tag_direction_t *tag_directions;
184 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags);
187 tre_free(regex_t *preg);
190 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
191 const tre_tnfa_t *tnfa, int *tags, int match_eo);
194 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
195 tre_str_type_t type, int *match_tags, int eflags,
199 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
200 tre_str_type_t type, int *match_tags, int eflags,
204 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
205 int len, tre_str_type_t type, int *match_tags,
206 int eflags, int *match_end_ofs);
209 /* from tre-mem.h: */
211 #define TRE_MEM_BLOCK_SIZE 1024
213 typedef struct tre_list {
215 struct tre_list *next;
218 typedef struct tre_mem_struct {
227 #define tre_mem_new_impl __tre_mem_new_impl
228 #define tre_mem_alloc_impl __tre_mem_alloc_impl
229 #define tre_mem_destroy __tre_mem_destroy
231 tre_mem_t tre_mem_new_impl(int provided, void *provided_block);
232 void *tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
233 int zero, size_t size);
235 /* Returns a new memory allocator or NULL if out of memory. */
236 #define tre_mem_new() tre_mem_new_impl(0, NULL)
238 /* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
239 allocated block or NULL if an underlying malloc() failed. */
240 #define tre_mem_alloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 0, size)
242 /* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
243 allocated block or NULL if an underlying malloc() failed. The memory
245 #define tre_mem_calloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 1, size)
247 #ifdef TRE_USE_ALLOCA
248 /* alloca() versions. Like above, but memory is allocated with alloca()
249 instead of malloc(). */
251 #define tre_mem_newa() \
252 tre_mem_new_impl(1, alloca(sizeof(struct tre_mem_struct)))
254 #define tre_mem_alloca(mem, size) \
255 ((mem)->n >= (size) \
256 ? tre_mem_alloc_impl((mem), 1, NULL, 0, (size)) \
257 : tre_mem_alloc_impl((mem), 1, alloca(TRE_MEM_BLOCK_SIZE), 0, (size)))
258 #endif /* TRE_USE_ALLOCA */
261 /* Frees the memory allocator and all memory allocated with it. */
262 void tre_mem_destroy(tre_mem_t mem);
264 #define xmalloc malloc
265 #define xcalloc calloc
267 #define xrealloc realloc