/* SPDX-License-Identifier: GPL-3.0-only */

#include "tf.h"
#include "txp.bison.h"

enum semantic_types {
	ST_STRVAL,
	ST_INTVAL,
	ST_BOOLVAL,
	ST_REGEX_PATTERN,
};

struct txp_context {
	char *errmsg;
	struct txp_ast_node *ast;
};

/*
 * Set the error bit in the parser context and log a message.
 *
 * This is called if the lexer or the parser detect an error. Only the first
 * error is logged (with a severity of "warn").
 */
__attribute__ ((format (printf, 3, 4)))
void txp_parse_error(int line, struct txp_context *ctx, const char *fmt, ...)
{
	va_list ap;
	char *tmp;

	if (ctx->errmsg) /* we already printed an error message */
		return;
	va_start(ap, fmt);
	xvasprintf(&tmp, fmt, ap);
	va_end(ap);
	xasprintf(&ctx->errmsg, "line %d: %s", line, tmp);
	free(tmp);
	WARNING_LOG("%s\n", ctx->errmsg);
}

/*
 * Parse a (generalized) string literal.
 *
 * This function turns the generalized C99 string literal given by src into a C
 * string.  For example, the string literal "xyz\n" is transformed into an
 * array containing the three characters 'x', 'y' and 'z', followed by a
 * newline character and the terminating zero byte. The function allows to
 * specify different quote characters so that, for example, regular expression
 * patterns enclosed in '/' can be parsed as well. To parse a proper string
 * literal, one has to pass two double quotes as the second argument.
 *
 * The function strips off the opening and leading quote characters, replaces
 * double backslashes by single backslashes and handles the usual escapes like
 * \n and \".
 *
 * The caller must make sure that the input is well-formed. The function simply
 * aborts if the input is not a valid C99 string literal (modulo the quote
 * characters).
 *
 * The return value is the offset of the first character after the closing
 * quote. For proper string literals this will be the terminating zero byte of
 * the input string, for regular expression patterns it is the beginning of the
 * flags which modify the matching behaviour.
 */
unsigned parse_quoted_string(const char *src, const char quote_chars[2],
		char **result)
{
	size_t n, len = strlen(src);
	char *dst, *p;
	bool backslash;

	assert(len >= 2);
	assert(src[0] == quote_chars[0]);
	p = dst = xmalloc(len - 1);
	backslash = false;
	for (n = 1;; n++) {
		char c;
		assert(n < len);
		c = src[n];
		if (!backslash) {
			if (c == '\\') {
				backslash = true;
				continue;
			}
			if (c == quote_chars[1])
				break;
			*p++ = c;
			continue;
		}
		if (c == quote_chars[1])
			*p++ = quote_chars[1];
		else switch (c) {
			case '\\': *p++ = '\\'; break;
			case 'a': *p++ = '\a'; break;
			case 'b': *p++ = '\b'; break;
			case 'f': *p++ = '\f'; break;
			case 'n': *p++ = '\n'; break;
			case 'r': *p++ = '\r'; break;
			case 't': *p++ = '\t'; break;
			case 'v': *p++ = '\v'; break;
			default: assert(false);
		}
		backslash = false;
	}
	assert(src[n] == quote_chars[1]);
	*p = '\0';
	*result = dst;
	return n + 1;
}

/*
 * Parse and compile an extended regular expression pattern, including flags.
 *
 * A regex pattern is identical to a C99 string literal except (a) it is
 * enclosed in '/' characters rather than double quotes, (b) double quote
 * characters which are part of the pattern do not need to be quoted with
 * backslashes, but slashes must be quoted in this way, and (c) the closing
 * slash may be followed by one or more flag characters which modify the
 * matching behaviour.
 *
 * The only flags which are currently supported are 'i' to ignore case in match
 * (REG_ICASE) and 'n' to change the handling of newline characters
 * (REG_NEWLINE).
 *
 * This function calls parse_quoted_string(), hence it aborts if the input
 * string is malformed. However, errors from regcomp(3) are returned without
 * aborting the process. The rationale behind this difference is that passing a
 * malformed string must be considered an implementation bug because malformed
 * strings should be rejected earlier by the lexer.
 */
int txp_parse_regex_pattern(const char *src, struct txp_re_pattern *result)
{
	int ret;
	char *pat;
	unsigned n = parse_quoted_string(src, "//", &pat);

	result->flags = 0;
	for (; src[n]; n++) {
		switch (src[n]) {
		case 'i': result->flags |= REG_ICASE; break;
		case 'n': result->flags |= REG_NEWLINE; break;
		default: assert(false);
		}
	}
	ret = xregcomp(&result->preg, pat, result->flags);
	free(pat);
	return ret;
}

static struct txp_ast_node *ast_node_raw(int id)
{
	struct txp_ast_node *node = xmalloc(sizeof(*node));
	node->id = id;
	return node;
}

/*
 * Allocate a new leaf node for the abstract syntax tree.
 *
 * This returns a pointer to a node whose ->num_children field is initialized
 * to zero. The ->id field is initialized with the given id.  The caller is
 * expected to initialize the ->sv field.
 *
 * This has to be non-static because it is also called from the lexer.
 */
struct txp_ast_node *txp_new_ast_leaf_node(int id)
{
	struct txp_ast_node *node = ast_node_raw(id);
	node->num_children = 0;
	return node;
}

struct txp_ast_node *ast_node_new_unary(int id, struct txp_ast_node *child)
{
	struct txp_ast_node *node = ast_node_raw(id);
	node->num_children = 1;
	node->children = xmalloc(sizeof(struct txp_ast_node *));
	node->children[0] = child;
	return node;
}

struct txp_ast_node *ast_node_new_binary(int id, struct txp_ast_node *left,
		struct txp_ast_node *right)
{
	struct txp_ast_node *node = ast_node_raw(id);
	node->num_children = 2;
	node->children = xmalloc(2 * sizeof(struct txp_ast_node *));
	node->children[0] = left;
	node->children[1] = right;
	return node;
}

/*
 * Deallocate an abstract syntax tree.
 *
 * This frees the memory occupied by the nodes of the AST, the child pointers
 * of the internal nodes and the (constant) semantic values of the leaf nodes
 * (string literals and pre-compiled regular expressions).
 */
static void txp_free_ast(struct txp_ast_node *root)
{
	if (!root)
		return;
	if (root->num_children > 0) {
		int i;
		for (i = 0; i < root->num_children; i++)
			txp_free_ast(root->children[i]);
		free(root->children);
	} else {
		union txp_semantic_value *sv = &root->sv;
		switch (root->id) {
		case STRING_LITERAL:
			free(sv->strval);
			break;
		case REGEX_PATTERN:
			regfree(&sv->re_pattern.preg);
			break;
		}
	}
	free(root);
}

void txp_free(struct txp_context *ctx)
{
	txp_free_ast(ctx->ast);
	free(ctx);
}

static int eval_node(const struct txp_ast_node *node,
		const struct txp_context *ctx,
		const struct epi_properties *props,
		union txp_semantic_value *result);

static void eval_binary_op(const struct txp_ast_node *node,
		const struct txp_context *ctx,
		const struct epi_properties *props,
		union txp_semantic_value *v1, union txp_semantic_value *v2)
{
	eval_node(node->children[0], ctx, props, v1);
	eval_node(node->children[1], ctx, props, v2);
}

static int eval_node(const struct txp_ast_node *node,
		const struct txp_context *ctx,
		const struct epi_properties *props,
		union txp_semantic_value *result)
{
	int ret;
	union txp_semantic_value v1, v2;

	assert(node);
	switch (node->id) {
	/* strings */
	case STRING_LITERAL:
		result->strval = node->sv.strval;
		return ST_STRVAL;
	case TEXT:
		result->strval = epi_text(props);
		return ST_STRVAL;
	/* integers */
	case NUM:
		result->intval = node->sv.intval;
		return ST_INTVAL;
	case '+':
		eval_binary_op(node, ctx, props, &v1, &v2);
		result->intval = v1.intval + v2.intval;
		return ST_INTVAL;
	case '-':
		eval_binary_op(node, ctx, props, &v1, &v2);
		result->intval = v1.intval - v2.intval;
		return ST_INTVAL;
	case '*':
		eval_binary_op(node, ctx, props, &v1, &v2);
		result->intval = v1.intval * v2.intval;
		return ST_INTVAL;
	case '/':
		eval_binary_op(node, ctx, props, &v1, &v2);
		if (v2.intval == 0) {
			static bool warned;
			if (!warned)
				ERROR_LOG("division by zero\n");
			warned = true;
			result->intval = 0;
		} else
			result->intval = v1.intval / v2.intval;
		return ST_INTVAL;
	case NEG:
		eval_node(node->children[0], ctx, props, &v1);
		result->intval = -v1.intval;
		return ST_INTVAL;
	case LEN:
		result->intval = epi_len(props);
		return ST_INTVAL;
	/* bools */
	case TAG:
		eval_node(node->children[0], ctx, props, &v1);
		result->boolval = epi_has_tag(node->children[0]->sv.strval,
			props);
		return ST_BOOLVAL;
	case TRUE:
		result->boolval = true;
		return ST_BOOLVAL;
	case FALSE:
		result->boolval = false;
		return ST_BOOLVAL;
	case OR:
		eval_binary_op(node, ctx, props, &v1, &v2);
		result->boolval = v1.boolval || v2.boolval;
		return ST_BOOLVAL;
	case AND:
		eval_binary_op(node, ctx, props, &v1, &v2);
		result->boolval = v1.boolval && v2.boolval;
		return ST_BOOLVAL;
	case NOT:
		eval_node(node->children[0], ctx, props, &v1);
		result->boolval = !v1.boolval;
		return ST_BOOLVAL;
	case EQUAL:
		ret = eval_node(node->children[0], ctx, props, &v1);
		eval_node(node->children[1], ctx, props, &v2);
		if (ret == ST_STRVAL)
			result->boolval = !strcmp(v1.strval, v2.strval);
		else
			result->boolval = v1.intval == v2.intval;
		return ST_BOOLVAL;
	case NOT_EQUAL:
		ret = eval_node(node->children[0], ctx, props, &v1);
		eval_node(node->children[1], ctx, props, &v2);
		if (ret == ST_STRVAL)
			result->boolval = strcmp(v1.strval, v2.strval);
		else
			result->boolval = v1.intval != v2.intval;
		return ST_BOOLVAL;
	case '<':
		eval_binary_op(node, ctx, props, &v1, &v2);
		result->boolval = v1.intval < v2.intval;
		return ST_BOOLVAL;
	case '>':
		eval_binary_op(node, ctx, props, &v1, &v2);
		result->boolval = v1.intval > v2.intval;
		return ST_BOOLVAL;
	case LESS_OR_EQUAL:
		eval_binary_op(node, ctx, props, &v1, &v2);
		result->boolval = v1.intval <= v2.intval;
		return ST_BOOLVAL;
	case GREATER_OR_EQUAL:
		eval_binary_op(node, ctx, props, &v1, &v2);
		result->boolval = v1.intval >= v2.intval;
		return ST_BOOLVAL;
	case REGEX_MATCH:
		eval_binary_op(node, ctx, props, &v1, &v2);
		result->boolval = regexec(&v2.re_pattern.preg, v1.strval,
			 0, NULL, 0) == 0;
		return ST_BOOLVAL;
	case REGEX_PATTERN:
		result->re_pattern = node->sv.re_pattern;
		return ST_REGEX_PATTERN;
	default:
		EMERG_LOG("bug: invalid node id %d\n", node->id);
		exit(EXIT_FAILURE);
	}
}

/*
 * Evaluate an abstract syntax tree, starting at the root node.
 *
 * The ctx argument should be the pointer that was returned from an earlier
 * call to txp_init(). The cookie properties structure contains the information
 * about the epigram.
 *
 * Returns true if the AST evaluates to true, a non-empty string, or a non-zero
 * number, false otherwise.
 */
bool txp_eval_ast(const struct txp_context *ctx,
		const struct epi_properties *props)
{
	union txp_semantic_value v;
	int ret;

	if (!ctx->ast)
		return true;
	ret = eval_node(ctx->ast, ctx, props, &v);

	if (ret == ST_INTVAL)
		return v.intval != 0;
	if (ret == ST_STRVAL)
		return v.strval[0] != 0;
	if (ret == ST_BOOLVAL)
		return v.boolval;
	assert(false);
}

int txp_yylex_init(txp_yyscan_t *yyscanner);
struct yy_buffer_state *txp_yy_scan_bytes(const char *buf, int len,
	txp_yyscan_t yyscanner);
void txp_yy_delete_buffer(struct yy_buffer_state *bs, txp_yyscan_t yyscanner);
int txp_yylex_destroy(txp_yyscan_t yyscanner);
void txp_yyset_lineno(int lineno, txp_yyscan_t scanner);

/*
 * Initialize the tag expression parser.
 *
 * This allocates and sets up the internal structures of the tag expression
 * parser and creates an abstract syntax tree from the given epigram (including
 * the tags). It must be called before txp_eval_ast() can be called.
 *
 * The context pointer returned by this function may be passed to mp_eval_ast()
 * to determine whether an epigram is admissible.
 *
 * The error message pointer may be NULL in which case no error message is
 * returned. Otherwise, the caller must free the returned string.
 */
int txp_init(const struct iovec *definition, struct txp_context **result,
		 char **errmsg)
{
	int ret;
	txp_yyscan_t scanner;
	struct txp_context *ctx;
	struct yy_buffer_state *buffer_state;

	ctx = xcalloc(sizeof(*ctx));
	ret = txp_yylex_init(&scanner);
	assert(ret == 0);
	buffer_state = txp_yy_scan_bytes(definition->iov_base,
		definition->iov_len, scanner);
	txp_yyset_lineno(1, scanner);
	NOTICE_LOG("creating abstract syntax tree from tag expression\n");
	ret = txp_yyparse(ctx, &ctx->ast, scanner);
	txp_yy_delete_buffer(buffer_state, scanner);
	txp_yylex_destroy(scanner);
	if (ctx->errmsg) { /* parse error */
		if (errmsg)
			*errmsg = ctx->errmsg;
		else
			free(ctx->errmsg);
		free(ctx);
		return -E_TXP;
	}
	if (errmsg)
		*errmsg = NULL;
	*result = ctx;
	return 1;
}
