summaryrefslogtreecommitdiff
path: root/src/regex/tbnf_regex.c
diff options
context:
space:
mode:
authormidipix <writeonce@midipix.org>2024-05-15 20:40:33 +0000
committermidipix <writeonce@midipix.org>2024-05-15 20:40:33 +0000
commitc257188cd912503371db1c2b7b2c59b4fd53df1c (patch)
tree8077d9f09fa85521f53a94f3fa33d17a7975a11c /src/regex/tbnf_regex.c
parentae7810f56e1daa1d2e35c06969c26835c1ed7800 (diff)
downloadtreebnf-c257188cd912503371db1c2b7b2c59b4fd53df1c.tar.bz2
treebnf-c257188cd912503371db1c2b7b2c59b4fd53df1c.tar.xz
regex module: implemented token scanners, added definitions and scan table.
Diffstat (limited to 'src/regex/tbnf_regex.c')
-rw-r--r--src/regex/tbnf_regex.c185
1 files changed, 185 insertions, 0 deletions
diff --git a/src/regex/tbnf_regex.c b/src/regex/tbnf_regex.c
new file mode 100644
index 0000000..2cdcb48
--- /dev/null
+++ b/src/regex/tbnf_regex.c
@@ -0,0 +1,185 @@
+/**************************************************************/
+/* treebnf: a tree oriented bnf library */
+/* Copyright (C) 2024 SysDeer Technologies, LLC */
+/* Released under GPLv2 and GPLv3; see COPYING.TREEBNF. */
+/**************************************************************/
+
+#include <treebnf/treebnf.h>
+
+#include "treebnf_regex_impl.h"
+#include "treebnf_visibility_impl.h"
+
+#include "tbnf_regex_defs.h"
+#include "tbnf_regex_scanfns.h"
+
+#define TBNF_STATE_STACK_SIZE (512)
+
+/* init state scan table*/
+static struct tbnf_scan_tbl tbnf_regex_scan_tbl__init[TBNF_REGEX_TOK_CAP] = {
+ /* --> brace */
+ [TBNF_REGEX_TOK_LBRACE] = {
+ .tok_scan_fn = tbnf_regex_scan_lbrace,
+ .tok_state_op = TBNF_STATE_PUSH,
+ .tok_state_next = TBNF_REGEX_STATE_BRACE,
+ },
+
+
+ /* --> bracket */
+ [TBNF_REGEX_TOK_LBRACKET_CIRCUMFLEX_RBRACKET] = {
+ .tok_scan_fn = tbnf_regex_scan_lbracket_circumflex_rbracket,
+ .tok_state_op = TBNF_STATE_PUSH,
+ .tok_state_next = TBNF_REGEX_STATE_BRACKET,
+ },
+
+ [TBNF_REGEX_TOK_LBRACKET_CIRCUMFLEX] = {
+ .tok_scan_fn = tbnf_regex_scan_lbracket_circumflex,
+ .tok_state_op = TBNF_STATE_PUSH,
+ .tok_state_next = TBNF_REGEX_STATE_BRACKET,
+ },
+
+ [TBNF_REGEX_TOK_LBRACKET_RBRACKET] = {
+ .tok_scan_fn = tbnf_regex_scan_lbracket_rbracket,
+ .tok_state_op = TBNF_STATE_PUSH,
+ .tok_state_next = TBNF_REGEX_STATE_BRACKET,
+ },
+
+ [TBNF_REGEX_TOK_LBRACKET] = {
+ .tok_scan_fn = tbnf_regex_scan_lbracket,
+ .tok_state_op = TBNF_STATE_PUSH,
+ .tok_state_next = TBNF_REGEX_STATE_BRACKET,
+ },
+
+
+ /* (expression) */
+ [TBNF_REGEX_TOK_ESCAPED_CHAR] = {
+ .tok_scan_fn = tbnf_regex_scan_escaped_char,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_CIRCUMFLEX_ASTERISK] = {
+ .tok_scan_fn = tbnf_regex_scan_circumflex_asterisk,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_CIRCUMFLEX] = {
+ .tok_scan_fn = tbnf_regex_scan_circumflex,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_LPAREN] = {
+ .tok_scan_fn = tbnf_regex_scan_lparen,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_RPAREN] = {
+ .tok_scan_fn = tbnf_regex_scan_rparen,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_ASTERISK] = {
+ .tok_scan_fn = tbnf_regex_scan_asterisk,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_PERIOD] = {
+ .tok_scan_fn = tbnf_regex_scan_period,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_DOLLAR] = {
+ .tok_scan_fn = tbnf_regex_scan_dollar,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_VLINE] = {
+ .tok_scan_fn = tbnf_regex_scan_vline,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_QMARK] = {
+ .tok_scan_fn = tbnf_regex_scan_qmark,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_PLUS] = {
+ .tok_scan_fn = tbnf_regex_scan_plus,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_CHAR] = {
+ .tok_scan_fn = tbnf_regex_scan_char,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+};
+
+
+/* brace state scan table*/
+static struct tbnf_scan_tbl tbnf_regex_scan_tbl__brace[TBNF_REGEX_TOK_CAP] = {
+ [TBNF_REGEX_TOK_BRACE_RBRACE] = {
+ .tok_scan_fn = tbnf_regex_scan_brace_rbrace,
+ .tok_state_op = TBNF_STATE_POP,
+ },
+
+ [TBNF_REGEX_TOK_BRACE_DIGIT] = {
+ .tok_scan_fn = tbnf_regex_scan_brace_digit,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_BRACE_COMMA] = {
+ .tok_scan_fn = tbnf_regex_scan_brace_comma,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+};
+
+
+/* bracket state scan table*/
+static struct tbnf_scan_tbl tbnf_regex_scan_tbl__bracket[TBNF_REGEX_TOK_CAP] = {
+ [TBNF_REGEX_TOK_BRACKET_RBRACKET] = {
+ .tok_scan_fn = tbnf_regex_scan_bracket_rbracket,
+ .tok_state_op = TBNF_STATE_POP,
+ },
+
+ [TBNF_REGEX_TOK_BRACKET_ESCAPED_CHAR] = {
+ .tok_scan_fn = tbnf_regex_scan_bracket_escaped_char,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_BRACKET_CHARACTER_CLASS] = {
+ .tok_scan_fn = tbnf_regex_scan_bracket_character_class,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_BRACKET_COLLATION_SYMBOL] = {
+ .tok_scan_fn = tbnf_regex_scan_bracket_collation_symbol,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_BRACKET_EQUIVALENCE_CLASS] = {
+ .tok_scan_fn = tbnf_regex_scan_bracket_equivalence_class,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_BRACKET_HYPHEN] = {
+ .tok_scan_fn = tbnf_regex_scan_bracket_hyphen,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_BRACKET_ERROR] = {
+ .tok_scan_fn = tbnf_regex_scan_bracket_error,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+
+ [TBNF_REGEX_TOK_BRACKET_CHAR] = {
+ .tok_scan_fn = tbnf_regex_scan_char,
+ .tok_state_op = TBNF_STATE_KEEP,
+ },
+};
+
+
+/* extended regex expression token scan table */
+tbnf_hidden struct tbnf_scan_tbl * tbnf_regex_scan_tbl[] = {
+ [TBNF_REGEX_STATE_INIT] = tbnf_regex_scan_tbl__init,
+ [TBNF_REGEX_STATE_BRACE] = tbnf_regex_scan_tbl__brace,
+ [TBNF_REGEX_STATE_BRACKET] = tbnf_regex_scan_tbl__bracket,
+ [TBNF_REGEX_STATE_CAP] = 0,
+};