/* tre-ast.h - Abstract syntax tree (AST) definitions This software is released under a BSD-style license. See the file LICENSE for details and copyright. */ #ifndef TRE_AST_H #define TRE_AST_H 1 #include "tre-mem.h" #include "tre-internal.h" #include "tre-compile.h" /* The different AST node types. */ typedef enum { LITERAL, CATENATION, ITERATION, UNION } tre_ast_type_t; /* Special subtypes of TRE_LITERAL. */ #define EMPTY -1 /* Empty leaf (denotes empty string). */ #define ASSERTION -2 /* Assertion leaf. */ #define TAG -3 /* Tag leaf. */ #define BACKREF -4 /* Back reference leaf. */ #define PARAMETER -5 /* Parameter. */ #define IS_SPECIAL(x) ((x)->code_min < 0) #define IS_EMPTY(x) ((x)->code_min == EMPTY) #define IS_ASSERTION(x) ((x)->code_min == ASSERTION) #define IS_TAG(x) ((x)->code_min == TAG) #define IS_BACKREF(x) ((x)->code_min == BACKREF) #define IS_PARAMETER(x) ((x)->code_min == PARAMETER) /* A generic AST node. All AST nodes consist of this node on the top level with `obj' pointing to the actual content. */ typedef struct { tre_ast_type_t type; /* Type of the node. */ void *obj; /* Pointer to actual node. */ int nullable; int submatch_id; int num_submatches; int num_tags; tre_pos_and_tags_t *firstpos; tre_pos_and_tags_t *lastpos; } tre_ast_node_t; /* A "literal" node. These are created for assertions, back references, tags, matching parameter settings, and all expressions that match one character. */ typedef struct { long code_min; long code_max; int position; union { tre_ctype_t class; int *params; } u; tre_ctype_t *neg_classes; } tre_literal_t; /* A "catenation" node. These are created when two regexps are concatenated. If there are more than one subexpressions in sequence, the `left' part holds all but the last, and `right' part holds the last subexpression (catenation is left associative). */ typedef struct { tre_ast_node_t *left; tre_ast_node_t *right; } tre_catenation_t; /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}" operators. */ typedef struct { /* Subexpression to match. */ tre_ast_node_t *arg; /* Minimum number of consecutive matches. */ int min; /* Maximum number of consecutive matches. */ int max; /* If 0, match as many characters as possible, if 1 match as few as possible. Note that this does not always mean the same thing as matching as many/few repetitions as possible. */ unsigned int minimal:1; /* Approximate matching parameters (or NULL). */ int *params; } tre_iteration_t; /* An "union" node. These are created for the "|" operator. */ typedef struct { tre_ast_node_t *left; tre_ast_node_t *right; } tre_union_t; tre_ast_node_t * tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size); tre_ast_node_t * tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position); tre_ast_node_t * tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal); tre_ast_node_t * tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right); tre_ast_node_t * tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right); #ifdef TRE_DEBUG void tre_ast_print(tre_ast_node_t *tree); /* XXX - rethink AST printing API */ void tre_print_params(int *params); #endif /* TRE_DEBUG */ #endif /* TRE_AST_H */ /* EOF */