From ae7810f56e1daa1d2e35c06969c26835c1ed7800 Mon Sep 17 00:00:00 2001 From: midipix Date: Wed, 15 May 2024 19:28:39 +0000 Subject: table-based token scanner: implemented tbnf_scan_token(), tbnf_scan_tokens(). --- src/tokscan/tbnf_scan_token.c | 102 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 src/tokscan/tbnf_scan_token.c (limited to 'src') diff --git a/src/tokscan/tbnf_scan_token.c b/src/tokscan/tbnf_scan_token.c new file mode 100644 index 0000000..b546959 --- /dev/null +++ b/src/tokscan/tbnf_scan_token.c @@ -0,0 +1,102 @@ +/**************************************************************/ +/* treebnf: a tree oriented bnf library */ +/* Copyright (C) 2024 SysDeer Technologies, LLC */ +/* Released under GPLv2 and GPLv3; see COPYING.TREEBNF. */ +/**************************************************************/ + +#include + +#define TBNF_STATE_STACK_SIZE (512) + +/* single token, read-only context */ +int tbnf_scan_token(const struct tbnf_scan_ctx * sctx, struct tbnf_token * tok) +{ + int ret = 0; + int len = 0; + int type = 0; + + int tidx = 0; + int sidx = sctx->tok_scan_state; + + for (; tidx < sctx->tok_scan_nents; ) { + if (sctx->tok_scan_tbls[sidx][tidx].tok_scan_fn) + ret = sctx->tok_scan_tbls[sidx][tidx].tok_scan_fn(sctx); + + if (ret > len) { + len = ret; + type = tidx; + } + + tidx++; + } + + tok->tok_type = type; + tok->tok_len = len; + tok->tok_off = sctx->tok_scan_mark - sctx->tok_scan_base; + + return (len > 0) ? 0 : -1; +} + +/* scan up to ntoks tokens, read-write context */ +int tbnf_scan_tokens(struct tbnf_scan_ctx * sctx, size_t ntoks, struct tbnf_token * tokv, int any) +{ + int ret; + int * state; + int * stcap; + int ststk[TBNF_STATE_STACK_SIZE]; + struct tbnf_scan_tbl * pentry; + + ret = 0; + ntoks = (ntoks > INT32_MAX) ? INT32_MAX : ntoks; + + state = ststk; + state[0] = sctx->tok_scan_state; + + stcap = &state[TBNF_STATE_STACK_SIZE]; + stcap--; + + /*******************************************************************/ + /* a positive return value that's smaller than the original ntoks, */ + /* in combination with mark < cap, indicates an error while trying */ + /* to obtain the next token. */ + /*******************************************************************/ + + for (; ntoks && (sctx->tok_scan_mark < sctx->tok_scan_cap); ) { + if (tbnf_scan_token(sctx,tokv) < 0) + return (ret > 0) ? ret : (-1); + + pentry = &sctx->tok_scan_tbls[*state][tokv->tok_type]; + + switch (pentry->tok_state_op) { + case TBNF_STATE_POP: + if (state == ststk) + return (-1); + + state--; + sctx->tok_scan_state = *state; + break; + + case TBNF_STATE_KEEP: + break; + + case TBNF_STATE_PUSH: + if (state == stcap) + return (-1); + + sctx->tok_scan_state = pentry->tok_state_next; + *++state = sctx->tok_scan_state; + break; + } + + sctx->tok_scan_type = tokv->tok_type; + sctx->tok_scan_mark += tokv->tok_len; + + tokv->tok_any = any; + tokv++; + + ntoks--; + ret++; + } + + return ret; +} -- cgit v1.2.3