diff --git a/gus/ b/gus/ @@ -200,6 +200,7 @@ EXCLUDED_URL_PREFIXES = [ # list of ~30000 stations, crawling takes too long "gemini://", + "gemini://", # this page inexplicably breaks both build_index, as well as elpher # when I browse to it... I think it might have some weird encoding diff --git a/serve/templates/news.gmi b/serve/templates/news.gmi @@ -2,6 +2,10 @@ ## News +### 2021-05-25 is now aware of more than 1000 capsules. Unfortunately this data is somewhat misleading: some of the capsules may already be gone, but GUS lacks a mechanism for invaliding old data. +I'll probably start with some manual cleanup the next days, so don't worry if numbers go down. + ### 2021-05-12 We are back on track with crawl and index, everything is up-to-date again. I had to add another news and a wikipedia mirror to the exclude list. The current implementation can't handle such a huge amount of information well.