orrg

Unnamed repository; edit this file 'description' to name the repository.
git clone git://code.clttr.info/orrg.git
Log | Files | Refs | README | LICENSE

commit 8c21cb40de18a1fc4f8cc0ee540a1c021d08d50a
parent 01370e679836f2a4a9a78b9e08ff08b5d9469329
Author: René Wagner <rwagner@rw-net.de>
Date:   Sun, 29 Nov 2020 10:03:54 +0100

integrate handling gemini:// uris using gcat

improve error handling and restructure orrg.pl

Diffstat:
A.gitignore | 1+
MREADME.md | 5+++--
Agcat | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Morrg.pl | 69++++++++++++++++++++++++++++++++++++++++++++++-----------------------
4 files changed, 122 insertions(+), 25 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1 @@ +data/* diff --git a/README.md b/README.md @@ -14,13 +14,13 @@ Preferably over the [mailing list](https://lists.sr.ht/~rwa/gmni-perl-cgi). ## features -- load an atom/rss feed from https (http is not supported!) given by user input +- load an atom/rss feed from gemini or https (http is deliberately not supported!) given by user input - render feed (channel info & entrys) as a gemini site - include links to originating site and every article - strip html tags from item description - lists of popular and recently visited feeds -Fetching feeds from gemini is currently not supported -> https://todo.sr.ht/~rwa/gmni-perl/4 +gemini-support is currently implemented using [gcat](https://github.com/aaronjanse/gcat) till popular perl libs have catched up. :) ## non-features @@ -43,3 +43,4 @@ Given this restrictions is not suitable for highly traffic feeds which are updat - DateTime - DateTime::Format::ISO8601 - HTML::Strip +- Python 3 for `gcat` diff --git a/gcat b/gcat @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 + +import cgi +import os +import socket +import ssl +import sys +import urllib.parse + +def absolutise_url(base, relative): + # Absolutise relative links + if "://" not in relative: + # Python's URL tools somehow only work with known schemes? + base = base.replace("gemini://","http://") + relative = urllib.parse.urljoin(base, relative) + relative = relative.replace("http://", "gemini://") + return relative + +if len(sys.argv) != 2: + print("Usage:") + print("gcat gemini://gemini.circumlunar.space") + sys.exit(1) + +url = sys.argv[1] +parsed_url = urllib.parse.urlparse(url) +if parsed_url.scheme == "": + url = "gemini://"+url + parsed_url = urllib.parse.urlparse(url) + +if parsed_url.scheme != "gemini": + print("Sorry, Gemini links only.") + sys.exit(1) +if parsed_url.port is not None: + useport = parsed_url.port +else: + useport = 1965 +# Do the Gemini transaction +while True: + s = socket.create_connection((parsed_url.hostname, useport)) + context = ssl.SSLContext() + context.check_hostname = False + context.verify_mode = ssl.CERT_NONE + s = context.wrap_socket(s, server_hostname = parsed_url.netloc) + s.sendall((url + '\r\n').encode("UTF-8")) + # Get header and check for redirects + fp = s.makefile("rb") + header = fp.readline() + print(header.decode("UTF-8"), end="") + header = header.decode("UTF-8").strip() + status, mime = header.split()[:2] + # Handle input requests + if status.startswith("1"): + # Prompt + query = input("INPUT" + mime + "> ") + url += "?" + urllib.parse.quote(query) # Bit lazy... + # Follow redirects + elif status.startswith("3"): + url = absolutise_url(url, mime) + parsed_url = urllib.parse.urlparse(url) + # Otherwise, we're done. + else: + break +# Fail if transaction was not successful +if status.startswith("2"): + if mime.startswith("text/"): + # Decode according to declared charset + mime, mime_opts = cgi.parse_header(mime) + body = fp.read() + body = body.decode(mime_opts.get("charset","UTF-8")) + print(body, end="") + else: + print(fp.read(), end="") diff --git a/orrg.pl b/orrg.pl @@ -26,7 +26,7 @@ if (!defined($ENV{'SERVER_PROTOCOL'}) || $ENV{'SERVER_PROTOCOL'} ne 'GEMINI') my $query = lc(uri_unescape($ENV{'QUERY_STRING'})); -if ($query eq '' || $query !~ /^https\:\/\//) { +if ($query eq '' || $query !~ /^(https|gemini)\:\/\//) { write_response('INPUT', 'Paste the URI of the rss feed you want to read:', undef); } @@ -39,40 +39,63 @@ sub create_response my ( $qs ) = @_; my @body = (); - my $feed = XML::FeedPP->new($qs, utf8_flag => 1); - + my $feed = feed_get($qs); if ( !defined($feed) ) { push @body, ('# orrg error', '', 'The requested feed could not be loaded. :(', '', '=> '. $qs .' open feed in browser'); return @body; } - - recent_add($qs, $feed->title); - popular_add($qs, $feed->title); + push @body, '# '. $feed->title; push @body, 'fetched '. strftime('%Y-%m-%dT%H:%M:%SZ', gmtime()); $feed->description eq '' or push @body, ('', $feed->description); $feed->image eq '' or push @body, '=> '. $feed->image .' feed image'; $feed->link eq '' or push @body, ('=> '.$feed->link.' open website', ''); + push @body, ('## recent feed items', ''); + foreach my $it ($feed->get_item()) { push @body, @{item($it)}; } + + push @body, ('', '', '=> index.pl [home]'); + return @body; +} + +sub item +{ + my ($it) = @_; my $hs = HTML::Strip->new(emit_spaces => 0, auto_reset => 1); - foreach my $it ($feed->get_item()) { - push @body, ($it->description ne '' || $it->pubDate ne '')? '### '. $it->title : $it->title; - if ($it->pubDate ne '') { - my $dt = DateTime::Format::ISO8601->parse_datetime($it->pubDate); - push @body, 'published '. strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($dt->epoch)); - push @body, ''; - } - if ($it->description ne '') { - my $desc = $it->description; - chomp $desc; - $desc =~ s/\<li\>/* /ig; - $desc =~ s/\<br \/\>/\r\n/ig; - push @body, $hs->parse($desc); - } - $it->link eq '' or push @body, ('=> '.$it->link.' open entry in browser', ''); + my @item = (); + push @item, ($it->description ne '' || $it->pubDate ne '')? '### '. $it->title : $it->title; + if ($it->pubDate ne '') { + my $dt = DateTime::Format::ISO8601->parse_datetime($it->pubDate); + push @item, 'published '. strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($dt->epoch)); + push @item, ''; } + if ($it->description ne '') { + my $desc = $it->description; + chomp $desc; + $desc =~ s/\<li\>/* /ig; + $desc =~ s/\<br \/\>/\r\n/ig; + push @item, $hs->parse($desc); + } + $it->link eq '' or push @item, ('=> '.$it->link.' open entry in browser', ''); - push @body, ('', '', '=> index.pl [home]'); - return @body; + return \@item; +} + +sub feed_get +{ + my ( $query ) = @_; + + my $feed; + if ( $query =~ /^https\:\/\// ) { $feed = XML::FeedPP->new($query, utf8_flag => 1); } + if ( $query =~ /^gemini\:\/\// ) { + my $content = `./gcat $query`; + $content =~ /20\W/ or return undef; + $content =~ s/^[0-9]{0,2}\W.+\r\n//; + $feed = XML::FeedPP->new($content, -type => 'string'); + } + + recent_add($query, $feed->title); + popular_add($query, $feed->title); + return $feed; }