summaryrefslogtreecommitdiff
path: root/src/regex/tbnf_regex_scanfns.h
diff options
context:
space:
mode:
authormidipix <writeonce@midipix.org>2024-05-15 20:40:33 +0000
committermidipix <writeonce@midipix.org>2024-05-15 20:40:33 +0000
commitc257188cd912503371db1c2b7b2c59b4fd53df1c (patch)
tree8077d9f09fa85521f53a94f3fa33d17a7975a11c /src/regex/tbnf_regex_scanfns.h
parentae7810f56e1daa1d2e35c06969c26835c1ed7800 (diff)
downloadtreebnf-c257188cd912503371db1c2b7b2c59b4fd53df1c.tar.bz2
treebnf-c257188cd912503371db1c2b7b2c59b4fd53df1c.tar.xz
regex module: implemented token scanners, added definitions and scan table.
Diffstat (limited to 'src/regex/tbnf_regex_scanfns.h')
-rw-r--r--src/regex/tbnf_regex_scanfns.h287
1 files changed, 287 insertions, 0 deletions
diff --git a/src/regex/tbnf_regex_scanfns.h b/src/regex/tbnf_regex_scanfns.h
new file mode 100644
index 0000000..c57a606
--- /dev/null
+++ b/src/regex/tbnf_regex_scanfns.h
@@ -0,0 +1,287 @@
+/**************************************************************/
+/* treebnf: a tree oriented bnf library */
+/* Copyright (C) 2024 SysDeer Technologies, LLC */
+/* Released under GPLv2 and GPLv3; see COPYING.TREEBNF. */
+/**************************************************************/
+
+#ifndef TBNF_REGEX_SCANFNS_H
+#define TBNF_REGEX_SCANFNS_H
+
+#include <treebnf/treebnf.h>
+
+/* in-brace scanners */
+static inline int tbnf_regex_scan_brace_rbrace(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '}')
+ return 1;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_brace_digit(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] >= '0')
+ if (sctx->tok_scan_mark[0] <= '9')
+ return 1;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_brace_comma(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == ',')
+ return 1;
+
+ return -1;
+}
+
+
+/* in-bracket scanners */
+static inline int tbnf_regex_scan_bracket_escaped_char(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '\\')
+ if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap)
+ if (sctx->tok_scan_mark[1])
+ return 2;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_bracket_character_class(const struct tbnf_scan_ctx * sctx)
+{
+ const char * ch = 0;
+
+ if (&sctx->tok_scan_mark[4] >= sctx->tok_scan_cap)
+ return -1;
+
+ if (sctx->tok_scan_mark[0] == '[')
+ if (sctx->tok_scan_mark[1] == ':')
+ ch = &sctx->tok_scan_mark[2];
+
+ if (!ch)
+ return -1;
+
+ for (; (*ch >= 'a') && (*ch <= 'z') && (&ch[2] < sctx->tok_scan_cap); )
+ ch++;
+
+ if ((*ch++ == ':') && (*ch++ == ']'))
+ return (ch - sctx->tok_scan_mark);
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_bracket_collation_symbol(const struct tbnf_scan_ctx * sctx)
+{
+ if (&sctx->tok_scan_mark[4] >= sctx->tok_scan_cap)
+ return -1;
+
+ if (sctx->tok_scan_mark[0] == '[')
+ if (sctx->tok_scan_mark[1] == '.')
+ if (sctx->tok_scan_mark[3] == '.')
+ if (sctx->tok_scan_mark[4] == ']')
+ if (sctx->tok_scan_mark[2])
+ return 5;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_bracket_equivalence_class(const struct tbnf_scan_ctx * sctx)
+{
+ const char * ch = 0;
+
+ if (&sctx->tok_scan_mark[4] >= sctx->tok_scan_cap)
+ return -1;
+
+ if (sctx->tok_scan_mark[0] == '[')
+ if (sctx->tok_scan_mark[1] == '=')
+ ch = &sctx->tok_scan_mark[2];
+
+ if (!ch)
+ return -1;
+
+ for (; (*ch >= 'a') && (*ch <= 'z') && (&ch[2] < sctx->tok_scan_cap); )
+ ch++;
+
+ if ((*ch++ == '=') && (*ch++ == ']'))
+ return (ch - sctx->tok_scan_mark);
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_bracket_rbracket(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == ']')
+ return 1;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_bracket_hyphen(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '-')
+ return 1;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_bracket_error(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '[')
+ return 1;
+
+ return -1;
+}
+
+
+
+/* bracket state entry scanners */
+static inline int tbnf_regex_scan_lbracket_circumflex_rbracket(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '[')
+ if (&sctx->tok_scan_mark[2] < sctx->tok_scan_cap)
+ if (sctx->tok_scan_mark[1] == '^')
+ if (sctx->tok_scan_mark[2] == ']')
+ return 3;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_lbracket_circumflex(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '[')
+ if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap)
+ if (sctx->tok_scan_mark[1] == '^')
+ return 2;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_lbracket_rbracket(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '[')
+ if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap)
+ if (sctx->tok_scan_mark[1] == ']')
+ return 2;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_lbracket(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '[')
+ return 1;
+
+ return -1;
+}
+
+
+/* common scanners */
+static inline int tbnf_regex_scan_escaped_char(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '\\')
+ if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap)
+ if (sctx->tok_scan_mark[1])
+ return 2;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_circumflex_asterisk(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '^')
+ if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap)
+ if (sctx->tok_scan_mark[1] == '*')
+ return 2;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_circumflex(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '^')
+ return 1;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_lbrace(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '{')
+ return 1;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_lparen(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '(')
+ return 1;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_rparen(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == ')')
+ return 1;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_asterisk(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '*')
+ return 1;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_period(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '.')
+ return 1;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_dollar(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '$')
+ return 1;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_vline(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '|')
+ return 1;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_qmark(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '?')
+ return 1;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_plus(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0] == '+')
+ return 1;
+
+ return -1;
+}
+
+static inline int tbnf_regex_scan_char(const struct tbnf_scan_ctx * sctx)
+{
+ if (sctx->tok_scan_mark[0])
+ return 1;
+
+ return -1;
+}
+
+#endif