loa

Virtual machine for the Logic of Assumptions
git clone git://juanmeleiro.mat.br/loa
Log | Files | Refs

commit 5a62b83bf91cab34389b7ab0e981c1804bc2ff54
parent c478cb456d822ba3957eef09d5fe26af9c1a24b7
Author: Juan F. Meleiro <juan@juanmeleiro.mat.br>
Date:   Thu,  9 May 2024 12:19:47 +0200

Add tokenizer

Diffstat:
Mcoding/default.o.do | 7+++++--
Mcoding/default.test.do | 1+
Mcoding/default.test.o.do | 3++-
Mcoding/test.do | 2+-
Acoding/tokenizer.c | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acoding/tokenizer.h | 10++++++++++
Acoding/tokenizer.test.c | 26++++++++++++++++++++++++++
7 files changed, 109 insertions(+), 4 deletions(-)

diff --git a/coding/default.o.do b/coding/default.o.do @@ -1 +1,4 @@ -cc -c $(basename "$1" .o).c -o "$3" -\ No newline at end of file +src=$(basename "$1" .o).c +redo-ifchange $src +echo cc -g -c $src -o "$1" >&2 +cc -g -c $src -o "$3" +\ No newline at end of file diff --git a/coding/default.test.do b/coding/default.test.do @@ -2,4 +2,5 @@ deps=$(grep '^#include ".*"$' $1.c | sed 's/#include "\(.*\)\.h"/\1.o/') redo-ifchange "$1.o" redo-ifchange $deps src=$(ls *.o | grep -v .test.o) +echo cc -o "$1" $src "$1.o" >&2 cc -o "$3" $src "$1.o" diff --git a/coding/default.test.o.do b/coding/default.test.o.do @@ -2,7 +2,8 @@ headers=*.h src=$(basename $1 .o).c -deps=$(grep '^#include ".*"$' $src | sed 's/#include "\(.*\)\.h"/\1.c/') +deps=$(grep '^#include ".*"$' $src | sed 's/#include "\(.*\)\.h"/\1.o/') redo-ifchange $deps redo-ifchange $src +echo cc -c -g -Wall -Wextra -fmax-errors=1 $src -o $1 >&2 cc -c -g -Wall -Wextra -fmax-errors=1 $src -o $3 >&2 diff --git a/coding/test.do b/coding/test.do @@ -1,6 +1,6 @@ #!/bin/sh -for t in symbol assoc stack tree schema gardener zen model +for t in symbol assoc stack tree schema gardener zen model tokenizer do redo-ifchange $t.test || exit 1 ./$t.test >&2 || exit 1 diff --git a/coding/tokenizer.c b/coding/tokenizer.c @@ -0,0 +1,64 @@ +#include <stdlib.h> +#include <assert.h> +#include <ctype.h> + +#include "tokenizer.h" + +struct tokenizer { + char *buf; + size_t cap; + size_t len; + FILE *src; +}; + +bool +isdelim(char c) +{ + return isspace(c) || iscntrl(c); +} + +tokenizer* +new_tokenizer(FILE* src) +{ + tokenizer *t = malloc(sizeof(tokenizer)); + t->cap = 1; + t->len = 0; + t->buf = malloc(sizeof(char)*t->cap); + t->src = src; + return t; +} + +void +buf_append(tokenizer *t, char c) +{ + if (t->len == t->cap) + t->buf = realloc(t->buf, (t->cap *= 2)*sizeof(char)); + assert(t->buf); + t->buf[t->len++] = c; +} + +symbol +next_token(tokenizer* t) +{ + char c; + symbol res; + + assert(t->len == 0); + + for (c = fgetc(t->src); !isdelim(c) && c != EOF; c = fgetc(t->src)) + buf_append(t, c); + buf_append(t, '\0'); + + while (isdelim(c)) c = fgetc(t->src); + if (!feof(t->src)) ungetc(c, t->src); + + res = intern(t->buf); + t->len = 0; + return res; +} + +bool +eos(tokenizer* t) +{ + return feof(t->src); +} diff --git a/coding/tokenizer.h b/coding/tokenizer.h @@ -0,0 +1,10 @@ +#include <stdio.h> +#include <stdbool.h> + +#include "symbol.h" + +typedef struct tokenizer tokenizer; + +tokenizer *new_tokenizer(FILE*); +symbol next_token(tokenizer*); +bool eos(tokenizer*); diff --git a/coding/tokenizer.test.c b/coding/tokenizer.test.c @@ -0,0 +1,26 @@ +#include <stdio.h> +#include <assert.h> + +#include "tokenizer.h" + +#include "symbol.h" + + +#define STR "abc def ghi jkl\n\tmno \t\t\npqrs \rtuv wxyz\n" + +int +main() +{ + FILE *f = fmemopen(STR, sizeof(STR), "r"); + tokenizer *t = new_tokenizer(f); + assert(next_token(t) == intern("abc")); assert(!eos(t)); + assert(next_token(t) == intern("def")); assert(!eos(t)); + assert(next_token(t) == intern("ghi")); assert(!eos(t)); + assert(next_token(t) == intern("jkl")); assert(!eos(t)); + assert(next_token(t) == intern("mno")); assert(!eos(t)); + assert(next_token(t) == intern("pqrs")); assert(!eos(t)); + assert(next_token(t) == intern("tuv")); assert(!eos(t)); + assert(next_token(t) == intern("wxyz")); + assert(eos(t)); + return 0; +}