-
Dorian Weber authored11e245f4
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
coroutines.c 4.02 KiB
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
/* Define either of them to enable the corresponding rewritten forms of the
* routines or both to have both of them rewritten. */
#define DECOMPRESS_COROUTINE
// #define TOKENIZE_COROUTINE
/* Helper macro to simplify tracing of the function calls and messages. */
#define TRACE(...) do { \
fprintf(stderr, __VA_ARGS__); \
putc('\n', stderr); \
} while (0)
/* Helper macro to catch array overflows for extreme inputs. */
#define CATCH_OVERFLOW(ARR, LEN) do { \
if ((LEN) >= sizeof(ARR)/sizeof(*ARR)) { \
fprintf(stderr, "PANIC: Array " #ARR " overflow detected, abort\n"); \
exit(-1); \
} \
} while (0)
/* Enumeration of possible token tags. */
enum Tag { WORD, PUNCT };
/* Names of the token tags. */
static const char *TOKEN_TAG[] = {
[WORD] = "Word", [PUNCT] = "Punct"
};
/* Token type with tag and value. */
typedef struct {
enum Tag tag;
char val[256];
size_t len;
} Token;
/* Primitive token channel for buffering multiple detected tokens. */
static struct {
Token token[256];
size_t len;
} token_chan;
/* Function that adds another character to the token value. */
void add_to_token(char c) {
Token *token = &token_chan.token[token_chan.len];
CATCH_OVERFLOW(token->val, token->len);
token->val[token->len++] = c;
}
/* Function that adds the corresponding tag and closes token construction. */
void got_token(enum Tag tag) {
CATCH_OVERFLOW(token_chan.token, token_chan.len);
Token *token = &token_chan.token[token_chan.len];
token->val[token->len] = '\0';
token->tag = tag;
TRACE("got_token(%s) = \"%s\"", TOKEN_TAG[tag], token->val);
++token_chan.len;
}
/* Stackless coroutine-version of the decompress-routine. */
int co_decompress(void) {
static int pc, l, c;
switch (pc) {
case 0: while (1) {
c = getchar();
if (c == EOF)
return EOF;
if (c == 0xFF) {
l = getchar();
c = getchar();
while (l--) {
TRACE("nextchar() = '%c'", c);
pc = 1;
return c;
case 1:;
}
} else {
TRACE("nextchar() = '%c'", c);
pc = 2;
return c;
case 2:;
}
}}
}
/* Stackless coroutine-version of the tokenize-routine. */
void co_tokenize(int c) {
static int pc = 1;
switch (pc) {
case 0: while (1) {
pc = 1;
return;
case 1:;
TRACE("emit('%c')", c);
if (c == EOF)
return;
if (isalpha(c)) {
do {
add_to_token(c);
pc = 2;
return;
case 2:;
TRACE("emit('%c')", c);
} while (isalpha(c));
got_token(WORD);
}
add_to_token(c);
got_token(PUNCT);
}}
}
/* Decodes RLE-encoded input and pushes it into the tokenizer coroutine. */
void decompress(void) {
while (1) {
int c = getchar();
if (c == EOF)
break;
if (c == 0xFF) {
int l = getchar();
c = getchar();
while (l--) {
co_tokenize(c);
}
} else
co_tokenize(c);
}
co_tokenize(EOF);
}
/* Calls the decompressor-coroutine for decoding RLE-encoded input and
* constructs token. */
void tokenize(void) {
while (1) {
int c = co_decompress();
if (c == EOF)
break;
if (isalpha(c)) {
do {
add_to_token(c);
c = co_decompress();
} while (isalpha(c));
got_token(WORD);
}
add_to_token(c);
got_token(PUNCT);
}
}
/* Prints all token currently present in the token channel. */
void printToken(void) {
for (size_t i = 0; i < token_chan.len; ++i) {
Token *token = &token_chan.token[i];
TRACE(
"Token: {\n"
"\ttag: %s,\n"
"\tval: \"%s\"\n"
"}",
TOKEN_TAG[token->tag],
token->val
);
token->len = 0;
}
token_chan.len = 0;
}
/* Program entry. */
int main() {
#if defined(TOKENIZE_COROUTINE) && defined(DECOMPRESS_COROUTINE)
fprintf(stderr, "Decompress Coroutine, Tokenize Coroutine\n");
for (int c; (c = co_decompress()) != EOF;) {
co_tokenize(c);
printToken();
}
#elif defined(TOKENIZE_COROUTINE)
fprintf(stderr, "Tokenize Routine, Decompress Coroutine\n");
tokenize();
#elif defined(DECOMPRESS_COROUTINE)
fprintf(stderr, "Decompress Routine, Tokenize Coroutine\n");
decompress();
#else
#error "At least one (or both) of TOKENIZE_COROUTINE or DECOMPRESS_COROUTINE should be defined."
#endif
return 0;
}