diff options
author | midipix <writeonce@midipix.org> | 2024-05-15 20:40:33 +0000 |
---|---|---|
committer | midipix <writeonce@midipix.org> | 2024-05-15 20:40:33 +0000 |
commit | c257188cd912503371db1c2b7b2c59b4fd53df1c (patch) | |
tree | 8077d9f09fa85521f53a94f3fa33d17a7975a11c /src/regex/tbnf_regex_scanfns.h | |
parent | ae7810f56e1daa1d2e35c06969c26835c1ed7800 (diff) | |
download | treebnf-c257188cd912503371db1c2b7b2c59b4fd53df1c.tar.bz2 treebnf-c257188cd912503371db1c2b7b2c59b4fd53df1c.tar.xz |
regex module: implemented token scanners, added definitions and scan table.
Diffstat (limited to 'src/regex/tbnf_regex_scanfns.h')
-rw-r--r-- | src/regex/tbnf_regex_scanfns.h | 287 |
1 files changed, 287 insertions, 0 deletions
diff --git a/src/regex/tbnf_regex_scanfns.h b/src/regex/tbnf_regex_scanfns.h new file mode 100644 index 0000000..c57a606 --- /dev/null +++ b/src/regex/tbnf_regex_scanfns.h @@ -0,0 +1,287 @@ +/**************************************************************/ +/* treebnf: a tree oriented bnf library */ +/* Copyright (C) 2024 SysDeer Technologies, LLC */ +/* Released under GPLv2 and GPLv3; see COPYING.TREEBNF. */ +/**************************************************************/ + +#ifndef TBNF_REGEX_SCANFNS_H +#define TBNF_REGEX_SCANFNS_H + +#include <treebnf/treebnf.h> + +/* in-brace scanners */ +static inline int tbnf_regex_scan_brace_rbrace(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '}') + return 1; + + return -1; +} + +static inline int tbnf_regex_scan_brace_digit(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] >= '0') + if (sctx->tok_scan_mark[0] <= '9') + return 1; + + return -1; +} + +static inline int tbnf_regex_scan_brace_comma(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == ',') + return 1; + + return -1; +} + + +/* in-bracket scanners */ +static inline int tbnf_regex_scan_bracket_escaped_char(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '\\') + if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap) + if (sctx->tok_scan_mark[1]) + return 2; + + return -1; +} + +static inline int tbnf_regex_scan_bracket_character_class(const struct tbnf_scan_ctx * sctx) +{ + const char * ch = 0; + + if (&sctx->tok_scan_mark[4] >= sctx->tok_scan_cap) + return -1; + + if (sctx->tok_scan_mark[0] == '[') + if (sctx->tok_scan_mark[1] == ':') + ch = &sctx->tok_scan_mark[2]; + + if (!ch) + return -1; + + for (; (*ch >= 'a') && (*ch <= 'z') && (&ch[2] < sctx->tok_scan_cap); ) + ch++; + + if ((*ch++ == ':') && (*ch++ == ']')) + return (ch - sctx->tok_scan_mark); + + return -1; +} + +static inline int tbnf_regex_scan_bracket_collation_symbol(const struct tbnf_scan_ctx * sctx) +{ + if (&sctx->tok_scan_mark[4] >= sctx->tok_scan_cap) + return -1; + + if (sctx->tok_scan_mark[0] == '[') + if (sctx->tok_scan_mark[1] == '.') + if (sctx->tok_scan_mark[3] == '.') + if (sctx->tok_scan_mark[4] == ']') + if (sctx->tok_scan_mark[2]) + return 5; + + return -1; +} + +static inline int tbnf_regex_scan_bracket_equivalence_class(const struct tbnf_scan_ctx * sctx) +{ + const char * ch = 0; + + if (&sctx->tok_scan_mark[4] >= sctx->tok_scan_cap) + return -1; + + if (sctx->tok_scan_mark[0] == '[') + if (sctx->tok_scan_mark[1] == '=') + ch = &sctx->tok_scan_mark[2]; + + if (!ch) + return -1; + + for (; (*ch >= 'a') && (*ch <= 'z') && (&ch[2] < sctx->tok_scan_cap); ) + ch++; + + if ((*ch++ == '=') && (*ch++ == ']')) + return (ch - sctx->tok_scan_mark); + + return -1; +} + +static inline int tbnf_regex_scan_bracket_rbracket(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == ']') + return 1; + + return -1; +} + +static inline int tbnf_regex_scan_bracket_hyphen(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '-') + return 1; + + return -1; +} + +static inline int tbnf_regex_scan_bracket_error(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '[') + return 1; + + return -1; +} + + + +/* bracket state entry scanners */ +static inline int tbnf_regex_scan_lbracket_circumflex_rbracket(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '[') + if (&sctx->tok_scan_mark[2] < sctx->tok_scan_cap) + if (sctx->tok_scan_mark[1] == '^') + if (sctx->tok_scan_mark[2] == ']') + return 3; + + return -1; +} + +static inline int tbnf_regex_scan_lbracket_circumflex(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '[') + if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap) + if (sctx->tok_scan_mark[1] == '^') + return 2; + + return -1; +} + +static inline int tbnf_regex_scan_lbracket_rbracket(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '[') + if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap) + if (sctx->tok_scan_mark[1] == ']') + return 2; + + return -1; +} + +static inline int tbnf_regex_scan_lbracket(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '[') + return 1; + + return -1; +} + + +/* common scanners */ +static inline int tbnf_regex_scan_escaped_char(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '\\') + if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap) + if (sctx->tok_scan_mark[1]) + return 2; + + return -1; +} + +static inline int tbnf_regex_scan_circumflex_asterisk(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '^') + if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap) + if (sctx->tok_scan_mark[1] == '*') + return 2; + + return -1; +} + +static inline int tbnf_regex_scan_circumflex(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '^') + return 1; + + return -1; +} + +static inline int tbnf_regex_scan_lbrace(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '{') + return 1; + + return -1; +} + +static inline int tbnf_regex_scan_lparen(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '(') + return 1; + + return -1; +} + +static inline int tbnf_regex_scan_rparen(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == ')') + return 1; + + return -1; +} + +static inline int tbnf_regex_scan_asterisk(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '*') + return 1; + + return -1; +} + +static inline int tbnf_regex_scan_period(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '.') + return 1; + + return -1; +} + +static inline int tbnf_regex_scan_dollar(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '$') + return 1; + + return -1; +} + +static inline int tbnf_regex_scan_vline(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '|') + return 1; + + return -1; +} + +static inline int tbnf_regex_scan_qmark(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '?') + return 1; + + return -1; +} + +static inline int tbnf_regex_scan_plus(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0] == '+') + return 1; + + return -1; +} + +static inline int tbnf_regex_scan_char(const struct tbnf_scan_ctx * sctx) +{ + if (sctx->tok_scan_mark[0]) + return 1; + + return -1; +} + +#endif |