From 01a9392ad6051878e217bffeffd6261ccf994c42 Mon Sep 17 00:00:00 2001 From: Anton Kling Date: Fri, 17 Nov 2023 23:58:14 +0100 Subject: Minibox: Add a somewhat improved shell This shell actually lexes and produces a AST which makes it easier to add features and will makes it more difficult to introduce bugs. So basically it is just better code. --- userland/minibox/utilities/sh/lexer.c | 101 ++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 userland/minibox/utilities/sh/lexer.c (limited to 'userland/minibox/utilities/sh/lexer.c') diff --git a/userland/minibox/utilities/sh/lexer.c b/userland/minibox/utilities/sh/lexer.c new file mode 100644 index 0000000..72011e6 --- /dev/null +++ b/userland/minibox/utilities/sh/lexer.c @@ -0,0 +1,101 @@ +#include +#include +#include "lexer.h" +#include +#include +#include +#include + +void free_tokens(struct TOKEN *token) { + for (; token;) { + struct TOKEN *old = token; + token = token->next; + free(old); + } +} + +int is_nonspecial_char(char c) { + if (!isprint(c)) + return 0; + if (isspace(c)) + return 0; + if (isalnum(c)) + return 1; + return ('>' != c && '|' != c && '&' != c); +} + +int parse_chars(const char **code_ptr, struct TOKEN *cur) { + const char *code = *code_ptr; + if (!is_nonspecial_char(*code)) + return 0; + cur->type = TOKEN_CHARS; + int i = 0; + for (; *code; code++, i++) { + if (!is_nonspecial_char(*code)) { + break; + } + assert(i < 256); + cur->string_rep[i] = *code; + } + cur->string_rep[i] = '\0'; + *code_ptr = code; + return 1; +} + +// Operands such as: &, &&, |, || etc +// Is operands the right word? +int parse_operand(const char **code_ptr, struct TOKEN *cur) { + const char *code = *code_ptr; +#define TRY_PARSE_STRING(_s, _token) \ + if (0 == strncmp(code, _s, strlen(_s))) { \ + cur->type = _token; \ + strcpy(cur->string_rep, _s); \ + code += strlen(_s); \ + goto complete_return; \ + } + TRY_PARSE_STRING("&&", TOKEN_AND); + TRY_PARSE_STRING("||", TOKEN_NOT); + TRY_PARSE_STRING(">>", TOKEN_STREAM_APPEND); + TRY_PARSE_STRING(">", TOKEN_STREAM); + TRY_PARSE_STRING("|", TOKEN_PIPE); + // TODO: & + + // Failed to parse + return 0; + +complete_return: + *code_ptr = code; + return 1; +} + +void skip_whitespace(const char **code_ptr) { + const char *code = *code_ptr; + for (; isspace(*code); code++) + ; + *code_ptr = code; +} + +struct TOKEN *lex(const char *code) { + struct TOKEN *head = NULL; + struct TOKEN *prev = NULL; + for (; *code;) { + skip_whitespace(&code); + if (!*code) + break; + struct TOKEN *cur = malloc(sizeof(struct TOKEN)); + cur->next = NULL; + if (prev) + prev->next = cur; + if (parse_chars(&code, cur)) { + } else if (parse_operand(&code, cur)) { + } else { + free(cur); + printf("at: %s\n", code); + assert(0 && "Unknown token"); + } + if (!head) + head = cur; + prev = cur; + } + return head; +} -- cgit v1.2.3