geminispace.info

Unnamed repository; edit this file 'description' to name the repository.
git clone git://code.clttr.info/geminispace.info.git
Log | Files | Refs | README | LICENSE

commit e14157666310f39c21852e367cdee4fd0e91a3a9
parent a85534a5bf24330f6a22f0dd9a388e4eab9fa152
Author: René Wagner <rwa@clttr.info>
Date:   Sun,  4 Jul 2021 21:49:27 +0200

update 2021-07-04 & more excludes

Diffstat:
Mgus/excludes.py | 27++++++++++++++++++++-------
Mserve/templates/news.gmi | 4++++
2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/gus/excludes.py b/gus/excludes.py @@ -12,6 +12,9 @@ EXCLUDED_URL_PREFIXES = [ # all combinations of a tictactoe board "gemini://tictactoe.lanterne.chilliet.eu", + # books?! + "gemini://gemini.zachdecook.com/cgi-bin/ccel.sh", + # serving big files and slooow capsule -> takes to long to crawl "gemini://kamalatta.ddnss.de/", @@ -42,11 +45,12 @@ EXCLUDED_URL_PREFIXES = [ # mastodon mirror - too big to crawl "gemini://vps01.rdelaage.ovh/", - + "gemini://gemini.lost-frequencies.eu/", # various failing resources on runjimmyrunrunyoufuckerrun.com "gemini://runjimmyrunrunyoufuckerrun.com/fonts/", "gemini://runjimmyrunrunyoufuckerrun.com/tmp/", - + "gemini://gemini.conman.org/boston/", + # Internal "gemini://gus.guru/search/", "gemini://gus.guru/v/search/", @@ -128,13 +132,15 @@ EXCLUDED_URL_PREFIXES = [ # youtube mirror "gemini://pon.ix.tc/cgi-bin/youtube.cgi?", "gemini://pon.ix.tc/youtube/", - # news mirrors - not our business + # news mirrors - not our businessn "gemini://guardian.shit.cx/", "gemini://simplynews.metalune.xyz", "gemini://illegaldrugs.net/cgi-bin/news.php?", "gemini://rawtext.club/~sloum/geminews", "gemini://gemini.cabestan.tk/hn", - + "gemini://hn.filiuspatris.net/", + "gemini://schmittstefan.de/de/nachrichten/", + # wikipedia proxy "gemini://wp.pitr.ca/", "gemini://wp.glv.one/", @@ -151,6 +157,10 @@ EXCLUDED_URL_PREFIXES = [ # gopher proxy "gemini://80h.dev/agena/", + # astrobotany + "gemini://astrobotany.mozz.us/", + "gemini://carboncopy.xyz/cgi-bin/apache.gex/", + # susa.net "gemini://gemini.susa.net/cgi-bin/search?", "gemini://gemini.susa.net/cgi-bin/twitter?", @@ -166,8 +176,8 @@ EXCLUDED_URL_PREFIXES = [ "gemini://higeki.jp/radio", # list of ~30000 stations, crawling takes too long - "gemini://gemini.tunerapp.org/stations/", - "gemini://tunerapp.org/stations/", + "gemini://gemini.tunerapp.org/", + "gemini://tunerapp.org/", # this page inexplicably breaks both build_index, as well as elpher # when I browse to it... I think it might have some weird encoding @@ -185,12 +195,15 @@ EXCLUDED_URL_PREFIXES = [ # these threads seem to expire "gemini://dioskouroi.xyz/thread", - # french news mirrors, there's just too much + # news mirrors, there's just too much "gemini://jpfox.fr/rss/", + "gemini://illegaldrugs.net/cgi-bin/news.php/", + "gemini://dw.schettler.net/", # docs - not our business "gemini://cfdocs.wetterberg.nu/", "gemini://godocs.io", + "gemini://hellomouse.net/user-pages/handicraftsman/ietf/", ] EXCLUDED_URL_PATHS = [ diff --git a/serve/templates/news.gmi b/serve/templates/news.gmi @@ -1,6 +1,10 @@ {% include 'fragments/header.gmi' %} ## News +### 2021-07-04 +More trouble along the way. Although the VPS hosting geminispace.info runs with 8 Gigs of RAM and does not serve other services, the index update got oom-killed. :( +Seems due to the continued growth of gemini we are hitting the same problems Natalie hit a few months ago on GUS. I'm currently unsure about the next steps. + ### 2021-06-26 It took almost ten days the last reindex to complete as i triggered a complete index. This was necessary after the cleanup as there is currently no incremental cleanup of the search index implemented. The design of GUS - which clearly has never been meant to index such a huge number of capsules - and the slow VPS are doing no good currently to keep the index up to date. Unfortunately we are currently stuck with the VPS.