summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/treebnf/treebnf.h4
-rw-r--r--project/common.mk1
-rw-r--r--project/tree.mk1
-rw-r--r--src/tokscan/tbnf_scan_token.c102
4 files changed, 108 insertions, 0 deletions
diff --git a/include/treebnf/treebnf.h b/include/treebnf/treebnf.h
index 1a8db27..a5c0359 100644
--- a/include/treebnf/treebnf.h
+++ b/include/treebnf/treebnf.h
@@ -163,6 +163,10 @@ tbnf_api int tbnf_lib_map_raw_input (const struct tbnf_driver_ctx *,
tbnf_api int tbnf_lib_unmap_raw_input (struct tbnf_raw_input *);
+/* table-based token scanner api */
+tbnf_api int tbnf_scan_token (const struct tbnf_scan_ctx *, struct tbnf_token *);
+tbnf_api int tbnf_scan_tokens (struct tbnf_scan_ctx *, size_t ntoks, struct tbnf_token *, int any);
+
/* utility api */
tbnf_api int tbnf_main (char **, char **,
const struct tbnf_fd_ctx *);
diff --git a/project/common.mk b/project/common.mk
index 7a77195..c9523bd 100644
--- a/project/common.mk
+++ b/project/common.mk
@@ -5,6 +5,7 @@ API_SRCS = \
src/driver/tbnf_unit_ctx.c \
src/output/tbnf_output_error.c \
src/skin/tbnf_skin_default.c \
+ src/tokscan/tbnf_scan_token.c \
INTERNAL_SRCS = \
src/internal/$(PACKAGE)_dprintf_impl.c \
diff --git a/project/tree.mk b/project/tree.mk
index f3ae848..0c51986 100644
--- a/project/tree.mk
+++ b/project/tree.mk
@@ -2,6 +2,7 @@ TREE_DIRS = src \
src/driver \
src/output \
src/skin \
+ src/tokscan \
src/internal \
tree.tag:
diff --git a/src/tokscan/tbnf_scan_token.c b/src/tokscan/tbnf_scan_token.c
new file mode 100644
index 0000000..b546959
--- /dev/null
+++ b/src/tokscan/tbnf_scan_token.c
@@ -0,0 +1,102 @@
+/**************************************************************/
+/* treebnf: a tree oriented bnf library */
+/* Copyright (C) 2024 SysDeer Technologies, LLC */
+/* Released under GPLv2 and GPLv3; see COPYING.TREEBNF. */
+/**************************************************************/
+
+#include <treebnf/treebnf.h>
+
+#define TBNF_STATE_STACK_SIZE (512)
+
+/* single token, read-only context */
+int tbnf_scan_token(const struct tbnf_scan_ctx * sctx, struct tbnf_token * tok)
+{
+ int ret = 0;
+ int len = 0;
+ int type = 0;
+
+ int tidx = 0;
+ int sidx = sctx->tok_scan_state;
+
+ for (; tidx < sctx->tok_scan_nents; ) {
+ if (sctx->tok_scan_tbls[sidx][tidx].tok_scan_fn)
+ ret = sctx->tok_scan_tbls[sidx][tidx].tok_scan_fn(sctx);
+
+ if (ret > len) {
+ len = ret;
+ type = tidx;
+ }
+
+ tidx++;
+ }
+
+ tok->tok_type = type;
+ tok->tok_len = len;
+ tok->tok_off = sctx->tok_scan_mark - sctx->tok_scan_base;
+
+ return (len > 0) ? 0 : -1;
+}
+
+/* scan up to ntoks tokens, read-write context */
+int tbnf_scan_tokens(struct tbnf_scan_ctx * sctx, size_t ntoks, struct tbnf_token * tokv, int any)
+{
+ int ret;
+ int * state;
+ int * stcap;
+ int ststk[TBNF_STATE_STACK_SIZE];
+ struct tbnf_scan_tbl * pentry;
+
+ ret = 0;
+ ntoks = (ntoks > INT32_MAX) ? INT32_MAX : ntoks;
+
+ state = ststk;
+ state[0] = sctx->tok_scan_state;
+
+ stcap = &state[TBNF_STATE_STACK_SIZE];
+ stcap--;
+
+ /*******************************************************************/
+ /* a positive return value that's smaller than the original ntoks, */
+ /* in combination with mark < cap, indicates an error while trying */
+ /* to obtain the next token. */
+ /*******************************************************************/
+
+ for (; ntoks && (sctx->tok_scan_mark < sctx->tok_scan_cap); ) {
+ if (tbnf_scan_token(sctx,tokv) < 0)
+ return (ret > 0) ? ret : (-1);
+
+ pentry = &sctx->tok_scan_tbls[*state][tokv->tok_type];
+
+ switch (pentry->tok_state_op) {
+ case TBNF_STATE_POP:
+ if (state == ststk)
+ return (-1);
+
+ state--;
+ sctx->tok_scan_state = *state;
+ break;
+
+ case TBNF_STATE_KEEP:
+ break;
+
+ case TBNF_STATE_PUSH:
+ if (state == stcap)
+ return (-1);
+
+ sctx->tok_scan_state = pentry->tok_state_next;
+ *++state = sctx->tok_scan_state;
+ break;
+ }
+
+ sctx->tok_scan_type = tokv->tok_type;
+ sctx->tok_scan_mark += tokv->tok_len;
+
+ tokv->tok_any = any;
+ tokv++;
+
+ ntoks--;
+ ret++;
+ }
+
+ return ret;
+}