summaryrefslogtreecommitdiff
path: root/userland/minibox/utilities/sh/lexer.c
blob: 72011e6aff5a9b14ac04354a99b546153f95b0ad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#include <assert.h>
#include <ctype.h>
#include "lexer.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>

void free_tokens(struct TOKEN *token) {
  for (; token;) {
    struct TOKEN *old = token;
    token = token->next;
    free(old);
  }
}

int is_nonspecial_char(char c) {
  if (!isprint(c))
    return 0;
  if (isspace(c))
    return 0;
  if (isalnum(c))
    return 1;
  return ('>' != c && '|' != c && '&' != c);
}

int parse_chars(const char **code_ptr, struct TOKEN *cur) {
  const char *code = *code_ptr;
  if (!is_nonspecial_char(*code))
    return 0;
  cur->type = TOKEN_CHARS;
  int i = 0;
  for (; *code; code++, i++) {
    if (!is_nonspecial_char(*code)) {
      break;
    }
    assert(i < 256);
    cur->string_rep[i] = *code;
  }
  cur->string_rep[i] = '\0';
  *code_ptr = code;
  return 1;
}

// Operands such as: &, &&, |, || etc
// Is operands the right word?
int parse_operand(const char **code_ptr, struct TOKEN *cur) {
  const char *code = *code_ptr;
#define TRY_PARSE_STRING(_s, _token)                                           \
  if (0 == strncmp(code, _s, strlen(_s))) {                                    \
    cur->type = _token;                                                        \
    strcpy(cur->string_rep, _s);                                               \
    code += strlen(_s);                                                        \
    goto complete_return;                                                      \
  }
  TRY_PARSE_STRING("&&", TOKEN_AND);
  TRY_PARSE_STRING("||", TOKEN_NOT);
  TRY_PARSE_STRING(">>", TOKEN_STREAM_APPEND);
  TRY_PARSE_STRING(">", TOKEN_STREAM);
  TRY_PARSE_STRING("|", TOKEN_PIPE);
  // TODO: &

  // Failed to parse
  return 0;

complete_return:
  *code_ptr = code;
  return 1;
}

void skip_whitespace(const char **code_ptr) {
  const char *code = *code_ptr;
  for (; isspace(*code); code++)
    ;
  *code_ptr = code;
}

struct TOKEN *lex(const char *code) {
  struct TOKEN *head = NULL;
  struct TOKEN *prev = NULL;
  for (; *code;) {
    skip_whitespace(&code);
    if (!*code)
      break;
    struct TOKEN *cur = malloc(sizeof(struct TOKEN));
    cur->next = NULL;
    if (prev)
      prev->next = cur;
    if (parse_chars(&code, cur)) {
    } else if (parse_operand(&code, cur)) {
    } else {
      free(cur);
      printf("at: %s\n", code);
      assert(0 && "Unknown token");
    }
    if (!head)
      head = cur;
    prev = cur;
  }
  return head;
}