diff options
author | Anton Kling <anton@kling.gg> | 2023-11-17 23:58:14 +0100 |
---|---|---|
committer | Anton Kling <anton@kling.gg> | 2023-11-17 23:58:14 +0100 |
commit | 01a9392ad6051878e217bffeffd6261ccf994c42 (patch) | |
tree | 17bb2d06531c32e17396ff6978983e133b79c6df /userland | |
parent | 0c9282bb61b0d7c463045139655b3f1f1ec5422b (diff) |
Minibox: Add a somewhat improved shell
This shell actually lexes and produces a AST which makes it easier to
add features and will makes it more difficult to introduce bugs. So
basically it is just better code.
Diffstat (limited to 'userland')
-rw-r--r-- | userland/minibox/Makefile | 2 | ||||
-rw-r--r-- | userland/minibox/minibox.c | 2 | ||||
-rw-r--r-- | userland/minibox/utilities/include.h | 1 | ||||
-rw-r--r-- | userland/minibox/utilities/sh/ast.c | 99 | ||||
-rw-r--r-- | userland/minibox/utilities/sh/ast.h | 36 | ||||
-rw-r--r-- | userland/minibox/utilities/sh/lexer.c | 101 | ||||
-rw-r--r-- | userland/minibox/utilities/sh/lexer.h | 24 | ||||
-rw-r--r-- | userland/minibox/utilities/sh/sh.c | 155 |
8 files changed, 418 insertions, 2 deletions
diff --git a/userland/minibox/Makefile b/userland/minibox/Makefile index 9e34357..b2894c3 100644 --- a/userland/minibox/Makefile +++ b/userland/minibox/Makefile @@ -1,6 +1,6 @@ CC="i686-sb-gcc" CFLAGS=-Wall -Wextra -pedantic -Wimplicit-fallthrough -g -O0 -OBJ=minibox.o utilities/cat.o utilities/echo.o utilities/yes.o utilities/minibox.o utilities/ascii.o utilities/wc.o utilities/init.o utilities/ls.o utilities/touch.o utilities/ed.o +OBJ=minibox.o utilities/cat.o utilities/echo.o utilities/yes.o utilities/minibox.o utilities/ascii.o utilities/wc.o utilities/init.o utilities/ls.o utilities/touch.o utilities/ed.o utilities/sh/sh.o utilities/sh/lexer.o utilities/sh/ast.o %.o: %.c $(CC) $(CFLAGS) $(INCLUDE) $(LIBS) -c $< -o $@ diff --git a/userland/minibox/minibox.c b/userland/minibox/minibox.c index 63eaee8..f3950df 100644 --- a/userland/minibox/minibox.c +++ b/userland/minibox/minibox.c @@ -22,7 +22,7 @@ typedef struct Command { Command utilities[] = {COMMAND(minibox), COMMAND(ascii), COMMAND(echo), COMMAND(cat), COMMAND(yes), COMMAND(wc), COMMAND(init), COMMAND(ls), COMMAND(touch), - COMMAND(ed)}; + COMMAND(ed), COMMAND(sh)}; char *parse_filename(char *str) { char *tmp = NULL, *is = str; diff --git a/userland/minibox/utilities/include.h b/userland/minibox/utilities/include.h index 7ffd136..b01d976 100644 --- a/userland/minibox/utilities/include.h +++ b/userland/minibox/utilities/include.h @@ -37,5 +37,6 @@ int wc_main(int argc, char **argv); int ls_main(int argc, char **argv); int touch_main(int argc, char **argv); int ed_main(int argc, char **argv); +int sh_main(int argc, char **argv); int init_main(void); diff --git a/userland/minibox/utilities/sh/ast.c b/userland/minibox/utilities/sh/ast.c new file mode 100644 index 0000000..64dd725 --- /dev/null +++ b/userland/minibox/utilities/sh/ast.c @@ -0,0 +1,99 @@ +#include "ast.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +void free_ast_command(struct AST *ast) { + free_ast(ast->children); + free_ast(ast->pipe_rhs); +} + +void free_ast(struct AST *ast) { + for (; ast;) { + if (AST_COMMAND == ast->type) + free_ast_command(ast); + struct AST *old = ast; + ast = ast->next; + free(old); + } +} + +struct AST *allocate_ast(void) { + struct AST *r = malloc(sizeof(struct AST)); + memset(r, 0, sizeof(struct AST)); + return r; +} + +int parse_command(struct TOKEN **token_ptr, struct AST *cur) { + struct TOKEN *token = *token_ptr; + if (TOKEN_CHARS != token->type) + return 0; + cur->type = AST_COMMAND; + cur->val.type = AST_VALUE_STRING; + cur->val.string = token->string_rep; + // Parse the arguments + if (token->next && TOKEN_CHARS == token->next->type) { + token = token->next; + cur->children = allocate_ast(); + struct AST *child = cur->children; + for (;;) { + child->type = AST_EXPRESSION; + child->val.type = AST_VALUE_STRING; + child->val.string = token->string_rep; + if (!token->next) + break; + if (TOKEN_CHARS != token->next->type) + break; + token = token->next; + child->next = allocate_ast(); + child = child->next; + } + } + token = token->next; + // Parse the stream modifier "prog > file.txt" + if (token && + (TOKEN_STREAM == token->type || TOKEN_STREAM_APPEND == token->type)) { + cur->file_out_append = (TOKEN_STREAM_APPEND == token->type); + // TODO: Allow it to be modified + cur->file_out_fd_to_use = STDOUT_FILENO; + token = token->next; + cur->file_out = token->string_rep; + token = token->next; + } + // Parse pipe '|' + if (token && TOKEN_PIPE == token->type) { + cur->pipe_rhs = allocate_ast(); + token = token->next; + if (!parse_command(&token, cur->pipe_rhs)) { + fprintf(stderr, "Expected command after |."); + exit(1); + } + } + *token_ptr = token; + return 1; +} + +struct AST *generate_ast(struct TOKEN *token) { + struct AST *head = NULL; + struct AST *prev = NULL; + for (; token;) { + struct AST *cur = allocate_ast(); + if (prev) + prev->next = cur; + if (parse_command(&token, cur)) { + } else if (TOKEN_AND == token->type) { + cur->type = AST_CONDITIONAL_AND; + token = token->next; + } else if (TOKEN_NOT == token->type) { + cur->type = AST_CONDITIONAL_NOT; + token = token->next; + } else { + token = token->next; + } + if (!head) + head = cur; + prev = cur; + } + return head; +} diff --git a/userland/minibox/utilities/sh/ast.h b/userland/minibox/utilities/sh/ast.h new file mode 100644 index 0000000..7e7aaff --- /dev/null +++ b/userland/minibox/utilities/sh/ast.h @@ -0,0 +1,36 @@ +#ifndef AST_H +#define AST_H +#include "lexer.h" + +typedef enum { + AST_VALUE_STRING, +} ast_value_type_t; + +struct AST_VALUE { + ast_value_type_t type; + union { + char *string; + }; +}; + +typedef enum { + AST_COMMAND, + AST_EXPRESSION, + AST_CONDITIONAL_AND, + AST_CONDITIONAL_NOT, +} ast_type_t; + +struct AST { + ast_type_t type; + struct AST_VALUE val; + struct AST *children; + struct AST *pipe_rhs; // in "func1 | func2" func2 is the piped rhs + int file_out_fd_to_use; + int file_out_append; + const char *file_out; // in "func1 > file.txt" file.txt is the file_out + struct AST *next; +}; + +void free_ast(struct AST *ast); +struct AST *generate_ast(struct TOKEN *token); +#endif // AST_H diff --git a/userland/minibox/utilities/sh/lexer.c b/userland/minibox/utilities/sh/lexer.c new file mode 100644 index 0000000..72011e6 --- /dev/null +++ b/userland/minibox/utilities/sh/lexer.c @@ -0,0 +1,101 @@ +#include <assert.h> +#include <ctype.h> +#include "lexer.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> + +void free_tokens(struct TOKEN *token) { + for (; token;) { + struct TOKEN *old = token; + token = token->next; + free(old); + } +} + +int is_nonspecial_char(char c) { + if (!isprint(c)) + return 0; + if (isspace(c)) + return 0; + if (isalnum(c)) + return 1; + return ('>' != c && '|' != c && '&' != c); +} + +int parse_chars(const char **code_ptr, struct TOKEN *cur) { + const char *code = *code_ptr; + if (!is_nonspecial_char(*code)) + return 0; + cur->type = TOKEN_CHARS; + int i = 0; + for (; *code; code++, i++) { + if (!is_nonspecial_char(*code)) { + break; + } + assert(i < 256); + cur->string_rep[i] = *code; + } + cur->string_rep[i] = '\0'; + *code_ptr = code; + return 1; +} + +// Operands such as: &, &&, |, || etc +// Is operands the right word? +int parse_operand(const char **code_ptr, struct TOKEN *cur) { + const char *code = *code_ptr; +#define TRY_PARSE_STRING(_s, _token) \ + if (0 == strncmp(code, _s, strlen(_s))) { \ + cur->type = _token; \ + strcpy(cur->string_rep, _s); \ + code += strlen(_s); \ + goto complete_return; \ + } + TRY_PARSE_STRING("&&", TOKEN_AND); + TRY_PARSE_STRING("||", TOKEN_NOT); + TRY_PARSE_STRING(">>", TOKEN_STREAM_APPEND); + TRY_PARSE_STRING(">", TOKEN_STREAM); + TRY_PARSE_STRING("|", TOKEN_PIPE); + // TODO: & + + // Failed to parse + return 0; + +complete_return: + *code_ptr = code; + return 1; +} + +void skip_whitespace(const char **code_ptr) { + const char *code = *code_ptr; + for (; isspace(*code); code++) + ; + *code_ptr = code; +} + +struct TOKEN *lex(const char *code) { + struct TOKEN *head = NULL; + struct TOKEN *prev = NULL; + for (; *code;) { + skip_whitespace(&code); + if (!*code) + break; + struct TOKEN *cur = malloc(sizeof(struct TOKEN)); + cur->next = NULL; + if (prev) + prev->next = cur; + if (parse_chars(&code, cur)) { + } else if (parse_operand(&code, cur)) { + } else { + free(cur); + printf("at: %s\n", code); + assert(0 && "Unknown token"); + } + if (!head) + head = cur; + prev = cur; + } + return head; +} diff --git a/userland/minibox/utilities/sh/lexer.h b/userland/minibox/utilities/sh/lexer.h new file mode 100644 index 0000000..57fb30b --- /dev/null +++ b/userland/minibox/utilities/sh/lexer.h @@ -0,0 +1,24 @@ +#ifndef LEXER_H +#define LEXER_H +#include <stddef.h> + +typedef enum { + TOKEN_CHARS, + TOKEN_AND, + TOKEN_NOT, + TOKEN_NOOP, + TOKEN_PIPE, + TOKEN_STREAM, + TOKEN_STREAM_APPEND, +} token_type_t; + +struct TOKEN { + token_type_t type; + char string_rep[256]; + struct TOKEN *next; +}; + +struct TOKEN *lex(const char *code); +struct AST *generate_ast(struct TOKEN *token); +void free_tokens(struct TOKEN *token); +#endif // LEXER_H diff --git a/userland/minibox/utilities/sh/sh.c b/userland/minibox/utilities/sh/sh.c new file mode 100644 index 0000000..590e489 --- /dev/null +++ b/userland/minibox/utilities/sh/sh.c @@ -0,0 +1,155 @@ +#include "ast.h" +#include "lexer.h" +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/wait.h> +#include <unistd.h> + +int execute_command(struct AST *ast, int input_fd); + +int execute_binary(struct AST *ast, int input_fd) { + char *program = ast->val.string; + struct AST *child = ast->children; + char *argv[100]; + argv[0] = program; + int i = 1; + for (; child; i++, child = child->next) { + argv[i] = child->val.string; + } + argv[i] = NULL; + + int in = input_fd; + int out = STDOUT_FILENO; + int slave_input = -1; + + int file_out_fd; + if (ast->file_out) { + file_out_fd = + open(ast->file_out, + O_WRONLY | O_CREAT | ((ast->file_out_append) ? O_APPEND : O_TRUNC), + 0666); + } + + if (ast->pipe_rhs) { + int fds[2]; + pipe(fds); + out = fds[1]; + slave_input = fds[0]; + } + + int pid = fork(); + if (0 == pid) { + if (slave_input >= 0) + close(slave_input); + dup2(in, STDIN_FILENO); + dup2(out, STDOUT_FILENO); + if (ast->file_out) + dup2(file_out_fd, ast->file_out_fd_to_use); + + execvp(program, argv); + exit(1); + } + if (ast->file_out) + close(file_out_fd); + + if (ast->pipe_rhs) { + if (out >= 0) + close(out); + return execute_command(ast->pipe_rhs, slave_input); + } + int rc; + // FIXME: Should use waitpid ... when my OS supports that + wait(&rc); + return rc; +} + +int execute_command(struct AST *ast, int input_fd) { + char *program = ast->val.string; + if (0 == strcmp(program, "cd")) { + struct AST *child = ast->children; + char *directory; + if (!child) + directory = "~"; + else + directory = child->val.string; + int rc = chdir(directory); + if (-1 == rc) { + perror("cd"); + return 1; + } + return 0; + } + return execute_binary(ast, input_fd); +} + +void execute_ast(struct AST *ast) { + int rc = -1; + for (; ast;) { + if (AST_COMMAND == ast->type) { + rc = execute_command(ast, STDIN_FILENO); + } else if (AST_CONDITIONAL_AND == ast->type) { + if (0 != rc) { + ast = ast->next; + if (!ast) + break; + } + } else if (AST_CONDITIONAL_NOT == ast->type) { + if (0 == rc) { + ast = ast->next; + if (!ast) + break; + } + } + ast = ast->next; + } +} + +char *get_line(void) { + char *str = malloc(1024); + char *p = str; + int rc; + for (;;) { + if (0 == (rc = read(0, p, 1))) { + continue; + } + if (0 > rc) { + perror("read"); + continue; + } + if (8 == *p) { + if (p == str) + continue; + putchar(*p); + p--; + continue; + } + putchar(*p); + if ('\n' == *p) { + break; + } + p++; + } + p++; + *p = '\0'; + return str; +} + +int sh_main(int argc, char **argv) { + (void)argc; + (void)argv; + for (;;) { + printf("/ : "); + char *line = get_line(); + { + struct TOKEN *h = lex(line); + struct AST *ast_h = generate_ast(h); + execute_ast(ast_h); + free_tokens(h); + free_ast(ast_h); + } + free(line); + } + return 0; +} |