#include <stdio.h> #include <stdlib.h> #include <ctype.h> /* Define either of them to enable the corresponding rewritten forms of the * routines or both to have both of them rewritten. */ #define DECOMPRESS_COROUTINE // #define TOKENIZE_COROUTINE /* Helper macro to simplify tracing of the function calls and messages. */ #define TRACE(...) do { \ fprintf(stderr, __VA_ARGS__); \ putc('\n', stderr); \ } while (0) /* Helper macro to catch array overflows for extreme inputs. */ #define CATCH_OVERFLOW(ARR, LEN) do { \ if ((LEN) >= sizeof(ARR)/sizeof(*ARR)) { \ fprintf(stderr, "PANIC: Array " #ARR " overflow detected, abort\n"); \ exit(-1); \ } \ } while (0) /* Enumeration of possible token tags. */ enum Tag { WORD, PUNCT }; /* Names of the token tags. */ static const char *TOKEN_TAG[] = { [WORD] = "Word", [PUNCT] = "Punct" }; /* Token type with tag and value. */ typedef struct { enum Tag tag; char val[256]; size_t len; } Token; /* Primitive token channel for buffering multiple detected tokens. */ static struct { Token token[256]; size_t len; } token_chan; /* Function that adds another character to the token value. */ void add_to_token(char c) { Token *token = &token_chan.token[token_chan.len]; CATCH_OVERFLOW(token->val, token->len); token->val[token->len++] = c; } /* Function that adds the corresponding tag and closes token construction. */ void got_token(enum Tag tag) { CATCH_OVERFLOW(token_chan.token, token_chan.len); Token *token = &token_chan.token[token_chan.len]; token->val[token->len] = '\0'; token->tag = tag; TRACE("got_token(%s) = \"%s\"", TOKEN_TAG[tag], token->val); ++token_chan.len; } /* Stackless coroutine-version of the decompress-routine. */ int co_decompress(void) { static int pc, l, c; switch (pc) { case 0: while (1) { c = getchar(); if (c == EOF) return EOF; if (c == 0xFF) { l = getchar(); c = getchar(); while (l--) { TRACE("nextchar() = '%c'", c); pc = 1; return c; case 1:; } } else { TRACE("nextchar() = '%c'", c); pc = 2; return c; case 2:; } }} } /* Stackless coroutine-version of the tokenize-routine. */ void co_tokenize(int c) { static int pc = 1; switch (pc) { case 0: while (1) { pc = 1; return; case 1:; TRACE("emit('%c')", c); if (c == EOF) return; if (isalpha(c)) { do { add_to_token(c); pc = 2; return; case 2:; TRACE("emit('%c')", c); } while (isalpha(c)); got_token(WORD); } add_to_token(c); got_token(PUNCT); }} } /* Decodes RLE-encoded input and pushes it into the tokenizer coroutine. */ void decompress(void) { while (1) { int c = getchar(); if (c == EOF) break; if (c == 0xFF) { int l = getchar(); c = getchar(); while (l--) { co_tokenize(c); } } else co_tokenize(c); } co_tokenize(EOF); } /* Calls the decompressor-coroutine for decoding RLE-encoded input and * constructs token. */ void tokenize(void) { while (1) { int c = co_decompress(); if (c == EOF) break; if (isalpha(c)) { do { add_to_token(c); c = co_decompress(); } while (isalpha(c)); got_token(WORD); } add_to_token(c); got_token(PUNCT); } } /* Prints all token currently present in the token channel. */ void printToken(void) { for (size_t i = 0; i < token_chan.len; ++i) { Token *token = &token_chan.token[i]; TRACE( "Token: {\n" "\ttag: %s,\n" "\tval: \"%s\"\n" "}", TOKEN_TAG[token->tag], token->val ); token->len = 0; } token_chan.len = 0; } /* Program entry. */ int main() { #if defined(TOKENIZE_COROUTINE) && defined(DECOMPRESS_COROUTINE) fprintf(stderr, "Decompress Coroutine, Tokenize Coroutine\n"); for (int c; (c = co_decompress()) != EOF;) { co_tokenize(c); printToken(); } #elif defined(TOKENIZE_COROUTINE) fprintf(stderr, "Tokenize Routine, Decompress Coroutine\n"); tokenize(); #elif defined(DECOMPRESS_COROUTINE) fprintf(stderr, "Decompress Routine, Tokenize Coroutine\n"); decompress(); #else #error "At least one (or both) of TOKENIZE_COROUTINE or DECOMPRESS_COROUTINE should be defined." #endif return 0; }