/**************************************************************/ /* treebnf: a tree oriented bnf library */ /* Copyright (C) 2024 SysDeer Technologies, LLC */ /* Released under GPLv2 and GPLv3; see COPYING.TREEBNF. */ /**************************************************************/ #ifndef TBNF_REGEX_SCANFNS_H #define TBNF_REGEX_SCANFNS_H #include /* in-brace scanners */ static inline int tbnf_regex_scan_brace_rbrace(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '}') return 1; return -1; } static inline int tbnf_regex_scan_brace_digit(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] >= '0') if (sctx->tok_scan_mark[0] <= '9') return 1; return -1; } static inline int tbnf_regex_scan_brace_comma(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == ',') return 1; return -1; } /* in-bracket scanners */ static inline int tbnf_regex_scan_bracket_escaped_char(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '\\') if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap) if (sctx->tok_scan_mark[1]) return 2; return -1; } static inline int tbnf_regex_scan_bracket_character_class(const struct tbnf_scan_ctx * sctx) { const char * ch = 0; if (&sctx->tok_scan_mark[4] >= sctx->tok_scan_cap) return -1; if (sctx->tok_scan_mark[0] == '[') if (sctx->tok_scan_mark[1] == ':') ch = &sctx->tok_scan_mark[2]; if (!ch) return -1; for (; (*ch >= 'a') && (*ch <= 'z') && (&ch[2] < sctx->tok_scan_cap); ) ch++; if ((*ch++ == ':') && (*ch++ == ']')) return (ch - sctx->tok_scan_mark); return -1; } static inline int tbnf_regex_scan_bracket_collation_symbol(const struct tbnf_scan_ctx * sctx) { if (&sctx->tok_scan_mark[4] >= sctx->tok_scan_cap) return -1; if (sctx->tok_scan_mark[0] == '[') if (sctx->tok_scan_mark[1] == '.') if (sctx->tok_scan_mark[3] == '.') if (sctx->tok_scan_mark[4] == ']') if (sctx->tok_scan_mark[2]) return 5; return -1; } static inline int tbnf_regex_scan_bracket_equivalence_class(const struct tbnf_scan_ctx * sctx) { const char * ch = 0; if (&sctx->tok_scan_mark[4] >= sctx->tok_scan_cap) return -1; if (sctx->tok_scan_mark[0] == '[') if (sctx->tok_scan_mark[1] == '=') ch = &sctx->tok_scan_mark[2]; if (!ch) return -1; for (; (*ch >= 'a') && (*ch <= 'z') && (&ch[2] < sctx->tok_scan_cap); ) ch++; if ((*ch++ == '=') && (*ch++ == ']')) return (ch - sctx->tok_scan_mark); return -1; } static inline int tbnf_regex_scan_bracket_rbracket(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == ']') return 1; return -1; } static inline int tbnf_regex_scan_bracket_hyphen(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '-') return 1; return -1; } static inline int tbnf_regex_scan_bracket_error(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '[') return 1; return -1; } /* bracket state entry scanners */ static inline int tbnf_regex_scan_lbracket_circumflex_rbracket(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '[') if (&sctx->tok_scan_mark[2] < sctx->tok_scan_cap) if (sctx->tok_scan_mark[1] == '^') if (sctx->tok_scan_mark[2] == ']') return 3; return -1; } static inline int tbnf_regex_scan_lbracket_circumflex(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '[') if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap) if (sctx->tok_scan_mark[1] == '^') return 2; return -1; } static inline int tbnf_regex_scan_lbracket_rbracket(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '[') if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap) if (sctx->tok_scan_mark[1] == ']') return 2; return -1; } static inline int tbnf_regex_scan_lbracket(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '[') return 1; return -1; } /* common scanners */ static inline int tbnf_regex_scan_escaped_char(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '\\') if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap) if (sctx->tok_scan_mark[1]) return 2; return -1; } static inline int tbnf_regex_scan_circumflex_asterisk(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '^') if (&sctx->tok_scan_mark[1] < sctx->tok_scan_cap) if (sctx->tok_scan_mark[1] == '*') return 2; return -1; } static inline int tbnf_regex_scan_circumflex(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '^') return 1; return -1; } static inline int tbnf_regex_scan_lbrace(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '{') return 1; return -1; } static inline int tbnf_regex_scan_lparen(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '(') return 1; return -1; } static inline int tbnf_regex_scan_rparen(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == ')') return 1; return -1; } static inline int tbnf_regex_scan_asterisk(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '*') return 1; return -1; } static inline int tbnf_regex_scan_period(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '.') return 1; return -1; } static inline int tbnf_regex_scan_dollar(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '$') return 1; return -1; } static inline int tbnf_regex_scan_vline(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '|') return 1; return -1; } static inline int tbnf_regex_scan_qmark(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '?') return 1; return -1; } static inline int tbnf_regex_scan_plus(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0] == '+') return 1; return -1; } static inline int tbnf_regex_scan_char(const struct tbnf_scan_ctx * sctx) { if (sctx->tok_scan_mark[0]) return 1; return -1; } #endif