summaryrefslogtreecommitdiff
path: root/src/util/tpax_path_replstr.c
blob: 1935628ca8f092feb40416d235b9e74380eccd9e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/**************************************************************/
/*  tpax: a topological pax implementation                    */
/*  Copyright (C) 2020--2024  SysDeer Technologies, LLC       */
/*  Released under GPLv2 and GPLv3; see COPYING.TPAX.         */
/**************************************************************/

#include <regex.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

#include <tpax/tpax.h>
#include "tpax_driver_impl.h"

static int tpax_backref_idx(const char c)
{
	return ((c >= '1') && (c <= '9')) ? c - '0' : 0;
}

int tpax_util_path_replstr(
	char *          dstpath,
	const char *    srcpath,
	const char *    replstr,
	const regex_t * regex,
	size_t          buflen,
	int             flags)
{
	int             ret;
	int             idx;
	regoff_t        ro;
	const char *    ch;
	char *          dst;
	size_t          explen;
	regmatch_t      pmatch[11];

	/* attempt to match */
	switch (regexec(regex,srcpath,11,pmatch,0)) {
		case 0:
			break;

		case REG_NOMATCH:
			return 0;

		default:
			return -1;
	}

	/* copy bytes leading up to match */
	if (buflen <= (explen = pmatch[0].rm_so)) {
		errno = ENOBUFS;
		return -1;
	}

	for (ro=0,dst=dstpath; ro<pmatch[0].rm_so; ro++)
		*dst++ = srcpath[ro];

	buflen -= explen;

	/* copy replacement string */
	for (ch=replstr,ret=0; buflen && *ch; ch++) {
		/* <ampersand> stands for the entire matched string */
		if (ch[0] == '&') {
			idx = 0;

		/* back-reference semantics: a matched subexpression or an empty string */
		} else if ((ch[0] == '\\') && (idx = tpax_backref_idx(ch[1]))) {
			if (pmatch[idx].rm_so < 0)
				idx = -1;

			ch++;

		/* all other escaped characters */
		} else if (ch[0] == '\\') {
			*dst++ = *++ch;
			idx    = -1;
			buflen--;

		/* all other characters */
		} else {
			*dst++ = *ch;
			idx    = -1;
			buflen--;
		}

		/* copy matched string or matched subexpression, if any */
		if (idx >= 0) {
			if (buflen <= (explen = (pmatch[idx].rm_eo - pmatch[idx].rm_so))) {
				errno = ENOBUFS;
				return -1;
			}

			for (ro=pmatch[idx].rm_so; ro<pmatch[idx].rm_eo; ro++)
				*dst++ = srcpath[ro];

			buflen -= explen;
		}
	}

	/* replace further occurrences as needed */
	if ((flags & TPAX_REPL_GLOBAL) && srcpath[pmatch[0].rm_eo])
		ret = tpax_util_path_replstr(
			dst,&srcpath[pmatch[0].rm_eo],replstr,
			regex,buflen,flags);

	if (ret < 0)
		return -1;

	/* copy remaining, non-matching bytes as needed */
	if (ret == 0) {
		for (ch=&srcpath[pmatch[0].rm_eo]; *ch; ch++)
			*dst++ = *ch;

		*dst = '\0';
	}

	/* all done */
	ret += (dst - dstpath);

	return ret;
}