commit 5a62b83bf91cab34389b7ab0e981c1804bc2ff54
parent c478cb456d822ba3957eef09d5fe26af9c1a24b7
Author: Juan F. Meleiro <juan@juanmeleiro.mat.br>
Date: Thu, 9 May 2024 12:19:47 +0200
Add tokenizer
Diffstat:
7 files changed, 109 insertions(+), 4 deletions(-)
diff --git a/coding/default.o.do b/coding/default.o.do
@@ -1 +1,4 @@
-cc -c $(basename "$1" .o).c -o "$3"
-\ No newline at end of file
+src=$(basename "$1" .o).c
+redo-ifchange $src
+echo cc -g -c $src -o "$1" >&2
+cc -g -c $src -o "$3"
+\ No newline at end of file
diff --git a/coding/default.test.do b/coding/default.test.do
@@ -2,4 +2,5 @@ deps=$(grep '^#include ".*"$' $1.c | sed 's/#include "\(.*\)\.h"/\1.o/')
redo-ifchange "$1.o"
redo-ifchange $deps
src=$(ls *.o | grep -v .test.o)
+echo cc -o "$1" $src "$1.o" >&2
cc -o "$3" $src "$1.o"
diff --git a/coding/default.test.o.do b/coding/default.test.o.do
@@ -2,7 +2,8 @@
headers=*.h
src=$(basename $1 .o).c
-deps=$(grep '^#include ".*"$' $src | sed 's/#include "\(.*\)\.h"/\1.c/')
+deps=$(grep '^#include ".*"$' $src | sed 's/#include "\(.*\)\.h"/\1.o/')
redo-ifchange $deps
redo-ifchange $src
+echo cc -c -g -Wall -Wextra -fmax-errors=1 $src -o $1 >&2
cc -c -g -Wall -Wextra -fmax-errors=1 $src -o $3 >&2
diff --git a/coding/test.do b/coding/test.do
@@ -1,6 +1,6 @@
#!/bin/sh
-for t in symbol assoc stack tree schema gardener zen model
+for t in symbol assoc stack tree schema gardener zen model tokenizer
do
redo-ifchange $t.test || exit 1
./$t.test >&2 || exit 1
diff --git a/coding/tokenizer.c b/coding/tokenizer.c
@@ -0,0 +1,64 @@
+#include <stdlib.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include "tokenizer.h"
+
+struct tokenizer {
+ char *buf;
+ size_t cap;
+ size_t len;
+ FILE *src;
+};
+
+bool
+isdelim(char c)
+{
+ return isspace(c) || iscntrl(c);
+}
+
+tokenizer*
+new_tokenizer(FILE* src)
+{
+ tokenizer *t = malloc(sizeof(tokenizer));
+ t->cap = 1;
+ t->len = 0;
+ t->buf = malloc(sizeof(char)*t->cap);
+ t->src = src;
+ return t;
+}
+
+void
+buf_append(tokenizer *t, char c)
+{
+ if (t->len == t->cap)
+ t->buf = realloc(t->buf, (t->cap *= 2)*sizeof(char));
+ assert(t->buf);
+ t->buf[t->len++] = c;
+}
+
+symbol
+next_token(tokenizer* t)
+{
+ char c;
+ symbol res;
+
+ assert(t->len == 0);
+
+ for (c = fgetc(t->src); !isdelim(c) && c != EOF; c = fgetc(t->src))
+ buf_append(t, c);
+ buf_append(t, '\0');
+
+ while (isdelim(c)) c = fgetc(t->src);
+ if (!feof(t->src)) ungetc(c, t->src);
+
+ res = intern(t->buf);
+ t->len = 0;
+ return res;
+}
+
+bool
+eos(tokenizer* t)
+{
+ return feof(t->src);
+}
diff --git a/coding/tokenizer.h b/coding/tokenizer.h
@@ -0,0 +1,10 @@
+#include <stdio.h>
+#include <stdbool.h>
+
+#include "symbol.h"
+
+typedef struct tokenizer tokenizer;
+
+tokenizer *new_tokenizer(FILE*);
+symbol next_token(tokenizer*);
+bool eos(tokenizer*);
diff --git a/coding/tokenizer.test.c b/coding/tokenizer.test.c
@@ -0,0 +1,26 @@
+#include <stdio.h>
+#include <assert.h>
+
+#include "tokenizer.h"
+
+#include "symbol.h"
+
+
+#define STR "abc def ghi jkl\n\tmno \t\t\npqrs \rtuv wxyz\n"
+
+int
+main()
+{
+ FILE *f = fmemopen(STR, sizeof(STR), "r");
+ tokenizer *t = new_tokenizer(f);
+ assert(next_token(t) == intern("abc")); assert(!eos(t));
+ assert(next_token(t) == intern("def")); assert(!eos(t));
+ assert(next_token(t) == intern("ghi")); assert(!eos(t));
+ assert(next_token(t) == intern("jkl")); assert(!eos(t));
+ assert(next_token(t) == intern("mno")); assert(!eos(t));
+ assert(next_token(t) == intern("pqrs")); assert(!eos(t));
+ assert(next_token(t) == intern("tuv")); assert(!eos(t));
+ assert(next_token(t) == intern("wxyz"));
+ assert(eos(t));
+ return 0;
+}