summaryrefslogtreecommitdiff
path: root/userland/minibox/utilities/sh/lexer.c
diff options
context:
space:
mode:
authorAnton Kling <anton@kling.gg>2023-11-17 23:58:14 +0100
committerAnton Kling <anton@kling.gg>2023-11-17 23:58:14 +0100
commit01a9392ad6051878e217bffeffd6261ccf994c42 (patch)
tree17bb2d06531c32e17396ff6978983e133b79c6df /userland/minibox/utilities/sh/lexer.c
parent0c9282bb61b0d7c463045139655b3f1f1ec5422b (diff)
Minibox: Add a somewhat improved shell
This shell actually lexes and produces a AST which makes it easier to add features and will makes it more difficult to introduce bugs. So basically it is just better code.
Diffstat (limited to 'userland/minibox/utilities/sh/lexer.c')
-rw-r--r--userland/minibox/utilities/sh/lexer.c101
1 files changed, 101 insertions, 0 deletions
diff --git a/userland/minibox/utilities/sh/lexer.c b/userland/minibox/utilities/sh/lexer.c
new file mode 100644
index 0000000..72011e6
--- /dev/null
+++ b/userland/minibox/utilities/sh/lexer.c
@@ -0,0 +1,101 @@
+#include <assert.h>
+#include <ctype.h>
+#include "lexer.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+void free_tokens(struct TOKEN *token) {
+ for (; token;) {
+ struct TOKEN *old = token;
+ token = token->next;
+ free(old);
+ }
+}
+
+int is_nonspecial_char(char c) {
+ if (!isprint(c))
+ return 0;
+ if (isspace(c))
+ return 0;
+ if (isalnum(c))
+ return 1;
+ return ('>' != c && '|' != c && '&' != c);
+}
+
+int parse_chars(const char **code_ptr, struct TOKEN *cur) {
+ const char *code = *code_ptr;
+ if (!is_nonspecial_char(*code))
+ return 0;
+ cur->type = TOKEN_CHARS;
+ int i = 0;
+ for (; *code; code++, i++) {
+ if (!is_nonspecial_char(*code)) {
+ break;
+ }
+ assert(i < 256);
+ cur->string_rep[i] = *code;
+ }
+ cur->string_rep[i] = '\0';
+ *code_ptr = code;
+ return 1;
+}
+
+// Operands such as: &, &&, |, || etc
+// Is operands the right word?
+int parse_operand(const char **code_ptr, struct TOKEN *cur) {
+ const char *code = *code_ptr;
+#define TRY_PARSE_STRING(_s, _token) \
+ if (0 == strncmp(code, _s, strlen(_s))) { \
+ cur->type = _token; \
+ strcpy(cur->string_rep, _s); \
+ code += strlen(_s); \
+ goto complete_return; \
+ }
+ TRY_PARSE_STRING("&&", TOKEN_AND);
+ TRY_PARSE_STRING("||", TOKEN_NOT);
+ TRY_PARSE_STRING(">>", TOKEN_STREAM_APPEND);
+ TRY_PARSE_STRING(">", TOKEN_STREAM);
+ TRY_PARSE_STRING("|", TOKEN_PIPE);
+ // TODO: &
+
+ // Failed to parse
+ return 0;
+
+complete_return:
+ *code_ptr = code;
+ return 1;
+}
+
+void skip_whitespace(const char **code_ptr) {
+ const char *code = *code_ptr;
+ for (; isspace(*code); code++)
+ ;
+ *code_ptr = code;
+}
+
+struct TOKEN *lex(const char *code) {
+ struct TOKEN *head = NULL;
+ struct TOKEN *prev = NULL;
+ for (; *code;) {
+ skip_whitespace(&code);
+ if (!*code)
+ break;
+ struct TOKEN *cur = malloc(sizeof(struct TOKEN));
+ cur->next = NULL;
+ if (prev)
+ prev->next = cur;
+ if (parse_chars(&code, cur)) {
+ } else if (parse_operand(&code, cur)) {
+ } else {
+ free(cur);
+ printf("at: %s\n", code);
+ assert(0 && "Unknown token");
+ }
+ if (!head)
+ head = cur;
+ prev = cur;
+ }
+ return head;
+}