Minibox: Add a somewhat improved shell

This shell actually lexes and produces a AST which makes it easier to add features and will makes it more difficult to introduce bugs. So basically it is just better code.
author: Anton Kling <anton@kling.gg> 2023-11-17 23:58:14 +0100
committer: Anton Kling <anton@kling.gg> 2023-11-17 23:58:14 +0100
commit: 01a9392ad6051878e217bffeffd6261ccf994c42 (patch)
tree: 17bb2d06531c32e17396ff6978983e133b79c6df /userland/minibox/utilities/sh/lexer.c
parent: 0c9282bb61b0d7c463045139655b3f1f1ec5422b (diff)
1 files changed, 101 insertions, 0 deletions
diff --git a/userland/minibox/utilities/sh/lexer.c b/userland/minibox/utilities/sh/lexer.c
new file mode 100644
index 0000000..72011e6
--- /dev/null
+++ b/userland/minibox/utilities/sh/lexer.c
@@ -0,0 +1,101 @@
+#include <assert.h>
+#include <ctype.h>
+#include "lexer.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+void free_tokens(struct TOKEN *token) {
+  for (; token;) {
+    struct TOKEN *old = token;
+    token = token->next;
+    free(old);
+  }
+}
+
+int is_nonspecial_char(char c) {
+  if (!isprint(c))
+    return 0;
+  if (isspace(c))
+    return 0;
+  if (isalnum(c))
+    return 1;
+  return ('>' != c && '|' != c && '&' != c);
+}
+
+int parse_chars(const char **code_ptr, struct TOKEN *cur) {
+  const char *code = *code_ptr;
+  if (!is_nonspecial_char(*code))
+    return 0;
+  cur->type = TOKEN_CHARS;
+  int i = 0;
+  for (; *code; code++, i++) {
+    if (!is_nonspecial_char(*code)) {
+      break;
+    }
+    assert(i < 256);
+    cur->string_rep[i] = *code;
+  }
+  cur->string_rep[i] = '\0';
+  *code_ptr = code;
+  return 1;
+}
+
+// Operands such as: &, &&, |, || etc
+// Is operands the right word?
+int parse_operand(const char **code_ptr, struct TOKEN *cur) {
+  const char *code = *code_ptr;
+#define TRY_PARSE_STRING(_s, _token)                                           \
+  if (0 == strncmp(code, _s, strlen(_s))) {                                    \
+    cur->type = _token;                                                        \
+    strcpy(cur->string_rep, _s);                                               \
+    code += strlen(_s);                                                        \
+    goto complete_return;                                                      \
+  }
+  TRY_PARSE_STRING("&&", TOKEN_AND);
+  TRY_PARSE_STRING("||", TOKEN_NOT);
+  TRY_PARSE_STRING(">>", TOKEN_STREAM_APPEND);
+  TRY_PARSE_STRING(">", TOKEN_STREAM);
+  TRY_PARSE_STRING("|", TOKEN_PIPE);
+  // TODO: &
+
+  // Failed to parse
+  return 0;
+
+complete_return:
+  *code_ptr = code;
+  return 1;
+}
+
+void skip_whitespace(const char **code_ptr) {
+  const char *code = *code_ptr;
+  for (; isspace(*code); code++)
+    ;
+  *code_ptr = code;
+}
+
+struct TOKEN *lex(const char *code) {
+  struct TOKEN *head = NULL;
+  struct TOKEN *prev = NULL;
+  for (; *code;) {
+    skip_whitespace(&code);
+    if (!*code)
+      break;
+    struct TOKEN *cur = malloc(sizeof(struct TOKEN));
+    cur->next = NULL;
+    if (prev)
+      prev->next = cur;
+    if (parse_chars(&code, cur)) {
+    } else if (parse_operand(&code, cur)) {
+    } else {
+      free(cur);
+      printf("at: %s\n", code);
+      assert(0 && "Unknown token");
+    }
+    if (!head)
+      head = cur;
+    prev = cur;
+  }
+  return head;
+}
author	Anton Kling <anton@kling.gg>	2023-11-17 23:58:14 +0100
committer	Anton Kling <anton@kling.gg>	2023-11-17 23:58:14 +0100
commit	01a9392ad6051878e217bffeffd6261ccf994c42 (patch)
tree	17bb2d06531c32e17396ff6978983e133b79c6df /userland/minibox/utilities/sh/lexer.c
parent	0c9282bb61b0d7c463045139655b3f1f1ec5422b (diff)