cgmnlm

Unnamed repository; edit this file 'description' to name the repository.
git clone git://code.clttr.info/cgmnlm.git
Log | Files | Refs | README | LICENSE

commit 48d0feed6d097c54662a7f231c7bc4704837f023
parent 33495e8dd86139cafade2888227e37b1572d18ea
Author: Drew DeVault <sir@cmpwn.com>
Date:   Sun, 20 Sep 2020 17:47:14 -0400

Initial pass on text/gemini parser

Diffstat:
Mconfigure | 1+
Minclude/gmni.h | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/gmni.c | 4+++-
Asrc/parser.c | 144+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 208 insertions(+), 1 deletion(-)

diff --git a/configure b/configure @@ -7,6 +7,7 @@ gmni() { src/client.c \ src/escape.c \ src/gmni.c \ + src/parser.c \ src/url.c } diff --git a/include/gmni.h b/include/gmni.h @@ -103,4 +103,64 @@ char *gemini_input_url(const char *url, const char *input); // of the given Gemini status code. enum gemini_status_class gemini_response_class(enum gemini_status status); +enum gemini_tok { + GEMINI_TEXT, + GEMINI_LINK, + GEMINI_PREFORMATTED, + GEMINI_HEADING, + GEMINI_LIST_ITEM, + GEMINI_QUOTE, +}; + +struct gemini_token { + enum gemini_tok token; + + // The token field determines which of the union members is valid. + union { + char *text; + + struct { + char *text; + char *url; // May be NULL + } link; + + struct { + char *text; + char *alt_text; // May be NULL + } preformatted; + + struct { + char *title; + int level; // 1, 2, or 3 + } heading; + + char *list_item; + char *quote_text; + }; +}; + +struct gemini_parser { + BIO *f; + char *buf; + size_t bufsz; + size_t bufln; +}; + +// Initializes a text/gemini parser which reads from the specified BIO. +void gemini_parser_init(struct gemini_parser *p, BIO *f); + +// Finishes this text/gemini parser and frees up its resources. +void gemini_parser_finish(struct gemini_parser *p); + +// Reads the next token from a text/gemini file. +// +// Returns 0 on success, 1 on EOF, and -1 on failure. +// +// Caller must call gemini_token_finish before exiting or re-using the token +// parameter. +int gemini_parser_next(struct gemini_parser *p, struct gemini_token *token); + +// Must be called after gemini_next to free up resources for the next token. +void gemini_token_finish(struct gemini_token *token); + #endif diff --git a/src/gmni.c b/src/gmni.c @@ -233,9 +233,11 @@ main(int argc, char *argv[]) printf("%d %s\n", resp.status, resp.meta); /* fallthrough */ case OMIT_HEADERS: - if (resp.status / 10 != 2) { + if (gemini_response_class(resp.status) != + GEMINI_STATUS_CLASS_SUCCESS) { break; } + char last; char buf[BUFSIZ]; for (int n = 1; n > 0;) { diff --git a/src/parser.c b/src/parser.c @@ -0,0 +1,144 @@ +#include <assert.h> +#include <ctype.h> +#include <openssl/bio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include "gmni.h" + +void +gemini_parser_init(struct gemini_parser *p, BIO *f) +{ + p->f = f; + p->bufln = 0; + p->bufsz = BUFSIZ; + p->buf = malloc(p->bufsz + 1); + p->buf[0] = 0; + BIO_up_ref(p->f); +} + +void +gemini_parser_finish(struct gemini_parser *p) +{ + if (!p) { + return; + } + BIO_free(p->f); + free(p->buf); +} + +int +gemini_parser_next(struct gemini_parser *p, struct gemini_token *tok) +{ + memset(tok, 0, sizeof(*tok)); + + int eof = 0; + while (!strstr(p->buf, "\n")) { + if (p->bufln == p->bufsz) { + p->bufsz *= 2; + char *buf = realloc(p->buf, p->bufsz); + assert(buf); + p->buf = buf; + } + + int n = BIO_read(p->f, &p->buf[p->bufln], p->bufsz - p->bufln); + if (n == -1) { + return -1; + } else if (n == 0) { + eof = 1; + break; + } + p->bufln += n; + p->buf[p->bufln] = 0; + } + + // TODO: Collapse multi-line text for the user-agent to wrap + char *end; + if ((end = strstr(p->buf, "\n")) != NULL) { + *end = 0; + } + + // TODO: Provide whitespace trimming helper function + if (strncmp(p->buf, "=>", 2) == 0) { + tok->token = GEMINI_LINK; + int i = 2; + while (p->buf[i] && isspace(p->buf[i])) ++i; + tok->link.url = &p->buf[i]; + + for (; p->buf[i]; ++i) { + if (isspace(p->buf[i])) { + p->buf[i++] = 0; + while (isspace(p->buf[i])) ++i; + if (p->buf[i]) { + tok->link.text = strdup(&p->buf[i]); + } + break; + } + } + + tok->link.url = strdup(tok->link.url); + } else if (strncmp(p->buf, "```", 3) == 0) { + tok->token = GEMINI_PREFORMATTED; // TODO + tok->preformatted.text = strdup("<text>"); + tok->preformatted.alt_text = strdup("<alt-text>"); + } else if (p->buf[0] == '#') { + tok->token = GEMINI_HEADING; + int level = 1; + while (p->buf[level] == '#' && level < 3) { + ++level; + } + tok->heading.level = level; + tok->heading.title = strdup(&p->buf[level]); + } else if (p->buf[0] == '*') { + tok->token = GEMINI_LIST_ITEM; + tok->list_item = strdup(&p->buf[1]); + } else if (p->buf[0] == '>') { + tok->token = GEMINI_QUOTE; + tok->quote_text = strdup(&p->buf[1]); + } else { + tok->token = GEMINI_TEXT; + tok->text = strdup(p->buf); + } + + if (end && end + 1 < p->buf + p->bufln) { + size_t len = end - p->buf + 1; + memmove(p->buf, end + 1, p->bufln - len); + p->bufln -= len; + } else { + p->buf[0] = 0; + p->bufln = 0; + } + + return eof; +} + +void +gemini_token_finish(struct gemini_token *tok) +{ + if (!tok) { + return; + } + + switch (tok->token) { + case GEMINI_TEXT: + free(tok->text); + break; + case GEMINI_LINK: + free(tok->link.text); + free(tok->link.url); + break; + case GEMINI_PREFORMATTED: + free(tok->preformatted.text); + free(tok->preformatted.alt_text); + break; + case GEMINI_HEADING: + free(tok->heading.title); + break; + case GEMINI_LIST_ITEM: + free(tok->list_item); + break; + case GEMINI_QUOTE: + free(tok->quote_text); + break; + } +}