geminispace.info

Unnamed repository; edit this file 'description' to name the repository.
git clone git://code.clttr.info/geminispace.info.git
Log | Files | Refs | README | LICENSE

commit 5915c5f6d297a9d91d1bfd6a86e9768a50fa5b1d
parent 759c0493b08f7675a2943be9d93cb8b066bff71c
Author: Gogs <gogs@fake.local>
Date:   Thu, 21 Jan 2021 21:08:39 +0100

add seeds & update ignored urls

Diffstat:
Mgus/crawl.py | 34++++++++++++++++++++++++++++++++--
Aseed-requests.txt | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 119 insertions(+), 2 deletions(-)

diff --git a/gus/crawl.py b/gus/crawl.py @@ -37,6 +37,16 @@ EXCLUDED_URL_PREFIXES = [ "gemini://gemini.conman.org/test", "gemini://gemini.circumlunar.space/users/fgaz/calculator/", + "gemini://gemini.bortzmeyer.org/rfc-mirror/", + "gemini://nixo.xyz/reply/", + "gemini://nixo.xyz/notify", + "gemini://blah.com/", + "gemini://gemini.thebackupbox.net/queryresponse", + "gemini://gem.garichankar.com/share_audio", + # various failing resources on runjimmyrunrunyoufuckerrun.com + "gemini://runjimmyrunrunyoufuckerrun.com/fonts/", + "gemini://runjimmyrunrunyoufuckerrun.com/tmp/", + # Internal "gemini://gus.guru/search/", "gemini://gus.guru/v/search/", @@ -46,6 +56,13 @@ EXCLUDED_URL_PREFIXES = [ "gemini://gus.guru/backlinks?", "gemini://gus.guru/threads", + "gemini://geminispace.info/search", + "gemini://geminispace.info/v/search", + "gemini://geminispace.info/search", + "gemini://geminispace.info/v/search", + "gemini://geminispace.info/add-seed", + "gemini://geminispace.info/backlinks", + "gemini://geminispace.info/threads", # Houston "gemini://houston.coder.town/search?", "gemini://houston.coder.town/search/", @@ -53,12 +70,15 @@ EXCLUDED_URL_PREFIXES = [ # Geddit "gemini://geddit.pitr.ca/post?", "gemini://geddit.pitr.ca/c/", - + "gemini://geddit.glv.one/post?", + "gemini://geddit.glv.one/c/", + # Marmaladefoo calculator "gemini://gemini.marmaladefoo.com/cgi-bin/calc.cgi?", # Individual weather pages "gemini://acidic.website/cgi-bin/weather.tcl?", + "gemini://caolan.uk/weather/", # Alex Schroeder's problematic stuff "gemini://vault.transjovian.org/", @@ -110,10 +130,12 @@ EXCLUDED_URL_PREFIXES = [ # wikipedia proxy "gemini://wp.pitr.ca/", + "gemini://wp.glv.one/", # client torture test "gemini://egsam.pitr.ca/", - + "gemini://egsam.glv.one/", + # mozz's chat "gemini://chat.mozz.us/stream", "gemini://chat.mozz.us/submit", @@ -155,6 +177,14 @@ EXCLUDED_URL_PREFIXES = [ # french news mirrors, there's just too much "gemini://jpfox.fr/rss/", + + # ZachDeCooks songs + "gemini://songs.zachdecook.com/song.gmi.php/", + "gemini://songs.zachdecook.com/chord.svg/", + + # robots.txt not served correctly + "gemini://orrg.clttr.info/orrg.pl", + "gemini://gmndemo.clttr.info/orrg/orrg.pl", ] EXCLUDED_URL_PATHS = [ diff --git a/seed-requests.txt b/seed-requests.txt @@ -0,0 +1,87 @@ +gemini://directory.randomroad.net +gemini://drewdevault.org +gemini://envs.net/ +gemini://e-worm.club/ +gemini://flounder.online +gemini://gemini.circumlunar.space/ +gemini://gemini.ctrl-c.club/ +gemini://gemlog.blue/users/ +gemini://gmn.clttr.info +gemini://park-city.club/ +gemini://rawtext.club +gemini://republic.circumlunar.space +gemini://soviet.circumlunar.space +gemini://talon.computer +gemini://tanelorn.city/ +gemini://tilde.club +gemini://tilde.pink/ +gemini://tilde.team/ +gemini://zaibatsu.circumlunar.space +gemini://gmi.skyjake.fi/ +gemini://chaosadmins.de/ +gemini://gemini.sirodoht.com/ +gemini://inconsistentuniverse.space +gemini://gemini.brunofontes.net/ +gemini://dctrud.randomroad.net/ +gemini://caranatar.xyz +gemini://boringcactus.com/ +gemini://blobpat.space +gemini://carnage.edvinbasil.com +gemini://xhrpb.com +gemini://xdefrag.dev +gemini://vignette.kalasarn.se +gemini://ur.gs/ +gemini://ubq323.website/ +gemini://tweek.zyxxyz.eu +gemini://trfs.me +gemini://treeblue.space +gemini://tokeniser.uk +gemini://tildeverse.org +gemini://thorjhanson.com +gemini://thesudorm.com +gemini://tdem.in +gemini://strm.online +gemini://sidewall.tokeniser.uk +gemini://schu.be/ +gemini://saintnet.tech +gemini://rdelaage.ovh +gemini://random-projects.net +gemini://rainbow-100.com +gemini://qd.discordian.de/ +gemini://pulley.host +gemini://pulham.info +gemini://port1965.eu +gemini://plock.net +gemini://pboyd.io +gemini://ogg.elwinar.com +gemini://niedzwiedzinski.cyou +gemini://mrnd.xyz +gemini://misterbanal.net +gemini://markdain.net +gemini://lleb.me +gemini://liesinties.site +gemini://kamalatta.ddnss.de +gemini://irth.pl/ +gemini://her.esy.fun +gemini://hecanjog.com +gemini://hacktivis.me +gemini://gemini.spicyporkbun.online +gemini://gemini.spam.works +gemini://gemini.slashdev.space +gemini://gemini.pyfisch.org +gemini://gemini.norns.space +gemini://gemini.is +gemini://gemini.go350.com/ +gemini://gemini.fancycade.xyz +gemini://gemini.djinn.party +gemini://gemini.digiprime.xyz/ +gemini://gemini.crowesnest.io +gemini://geminet.org +gemini://gem.snowgoons.ro +gemini://gem.johanbove.info +gemini://drawk.cab +gemini://celehner.com +gemini://caranatar.xyz +gemini://gem.garichankar.com/ +gemini://oppen.digital +gemini://syrinx.homelinux.org