From c257188cd912503371db1c2b7b2c59b4fd53df1c Mon Sep 17 00:00:00 2001 From: midipix Date: Wed, 15 May 2024 20:40:33 +0000 Subject: regex module: implemented token scanners, added definitions and scan table. --- src/regex/tbnf_regex.c | 185 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 src/regex/tbnf_regex.c (limited to 'src/regex/tbnf_regex.c') diff --git a/src/regex/tbnf_regex.c b/src/regex/tbnf_regex.c new file mode 100644 index 0000000..2cdcb48 --- /dev/null +++ b/src/regex/tbnf_regex.c @@ -0,0 +1,185 @@ +/**************************************************************/ +/* treebnf: a tree oriented bnf library */ +/* Copyright (C) 2024 SysDeer Technologies, LLC */ +/* Released under GPLv2 and GPLv3; see COPYING.TREEBNF. */ +/**************************************************************/ + +#include + +#include "treebnf_regex_impl.h" +#include "treebnf_visibility_impl.h" + +#include "tbnf_regex_defs.h" +#include "tbnf_regex_scanfns.h" + +#define TBNF_STATE_STACK_SIZE (512) + +/* init state scan table*/ +static struct tbnf_scan_tbl tbnf_regex_scan_tbl__init[TBNF_REGEX_TOK_CAP] = { + /* --> brace */ + [TBNF_REGEX_TOK_LBRACE] = { + .tok_scan_fn = tbnf_regex_scan_lbrace, + .tok_state_op = TBNF_STATE_PUSH, + .tok_state_next = TBNF_REGEX_STATE_BRACE, + }, + + + /* --> bracket */ + [TBNF_REGEX_TOK_LBRACKET_CIRCUMFLEX_RBRACKET] = { + .tok_scan_fn = tbnf_regex_scan_lbracket_circumflex_rbracket, + .tok_state_op = TBNF_STATE_PUSH, + .tok_state_next = TBNF_REGEX_STATE_BRACKET, + }, + + [TBNF_REGEX_TOK_LBRACKET_CIRCUMFLEX] = { + .tok_scan_fn = tbnf_regex_scan_lbracket_circumflex, + .tok_state_op = TBNF_STATE_PUSH, + .tok_state_next = TBNF_REGEX_STATE_BRACKET, + }, + + [TBNF_REGEX_TOK_LBRACKET_RBRACKET] = { + .tok_scan_fn = tbnf_regex_scan_lbracket_rbracket, + .tok_state_op = TBNF_STATE_PUSH, + .tok_state_next = TBNF_REGEX_STATE_BRACKET, + }, + + [TBNF_REGEX_TOK_LBRACKET] = { + .tok_scan_fn = tbnf_regex_scan_lbracket, + .tok_state_op = TBNF_STATE_PUSH, + .tok_state_next = TBNF_REGEX_STATE_BRACKET, + }, + + + /* (expression) */ + [TBNF_REGEX_TOK_ESCAPED_CHAR] = { + .tok_scan_fn = tbnf_regex_scan_escaped_char, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_CIRCUMFLEX_ASTERISK] = { + .tok_scan_fn = tbnf_regex_scan_circumflex_asterisk, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_CIRCUMFLEX] = { + .tok_scan_fn = tbnf_regex_scan_circumflex, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_LPAREN] = { + .tok_scan_fn = tbnf_regex_scan_lparen, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_RPAREN] = { + .tok_scan_fn = tbnf_regex_scan_rparen, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_ASTERISK] = { + .tok_scan_fn = tbnf_regex_scan_asterisk, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_PERIOD] = { + .tok_scan_fn = tbnf_regex_scan_period, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_DOLLAR] = { + .tok_scan_fn = tbnf_regex_scan_dollar, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_VLINE] = { + .tok_scan_fn = tbnf_regex_scan_vline, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_QMARK] = { + .tok_scan_fn = tbnf_regex_scan_qmark, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_PLUS] = { + .tok_scan_fn = tbnf_regex_scan_plus, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_CHAR] = { + .tok_scan_fn = tbnf_regex_scan_char, + .tok_state_op = TBNF_STATE_KEEP, + }, +}; + + +/* brace state scan table*/ +static struct tbnf_scan_tbl tbnf_regex_scan_tbl__brace[TBNF_REGEX_TOK_CAP] = { + [TBNF_REGEX_TOK_BRACE_RBRACE] = { + .tok_scan_fn = tbnf_regex_scan_brace_rbrace, + .tok_state_op = TBNF_STATE_POP, + }, + + [TBNF_REGEX_TOK_BRACE_DIGIT] = { + .tok_scan_fn = tbnf_regex_scan_brace_digit, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_BRACE_COMMA] = { + .tok_scan_fn = tbnf_regex_scan_brace_comma, + .tok_state_op = TBNF_STATE_KEEP, + }, +}; + + +/* bracket state scan table*/ +static struct tbnf_scan_tbl tbnf_regex_scan_tbl__bracket[TBNF_REGEX_TOK_CAP] = { + [TBNF_REGEX_TOK_BRACKET_RBRACKET] = { + .tok_scan_fn = tbnf_regex_scan_bracket_rbracket, + .tok_state_op = TBNF_STATE_POP, + }, + + [TBNF_REGEX_TOK_BRACKET_ESCAPED_CHAR] = { + .tok_scan_fn = tbnf_regex_scan_bracket_escaped_char, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_BRACKET_CHARACTER_CLASS] = { + .tok_scan_fn = tbnf_regex_scan_bracket_character_class, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_BRACKET_COLLATION_SYMBOL] = { + .tok_scan_fn = tbnf_regex_scan_bracket_collation_symbol, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_BRACKET_EQUIVALENCE_CLASS] = { + .tok_scan_fn = tbnf_regex_scan_bracket_equivalence_class, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_BRACKET_HYPHEN] = { + .tok_scan_fn = tbnf_regex_scan_bracket_hyphen, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_BRACKET_ERROR] = { + .tok_scan_fn = tbnf_regex_scan_bracket_error, + .tok_state_op = TBNF_STATE_KEEP, + }, + + [TBNF_REGEX_TOK_BRACKET_CHAR] = { + .tok_scan_fn = tbnf_regex_scan_char, + .tok_state_op = TBNF_STATE_KEEP, + }, +}; + + +/* extended regex expression token scan table */ +tbnf_hidden struct tbnf_scan_tbl * tbnf_regex_scan_tbl[] = { + [TBNF_REGEX_STATE_INIT] = tbnf_regex_scan_tbl__init, + [TBNF_REGEX_STATE_BRACE] = tbnf_regex_scan_tbl__brace, + [TBNF_REGEX_STATE_BRACKET] = tbnf_regex_scan_tbl__bracket, + [TBNF_REGEX_STATE_CAP] = 0, +}; -- cgit v1.2.3