geminispace.info

Unnamed repository; edit this file 'description' to name the repository.
git clone git://code.clttr.info/geminispace.info.git
Log | Files | Refs | README | LICENSE

commit 73883455c273b5ccfbd16196ee8889593199ee74
parent 6e524d1be9a8b87de6156c1f47a8bdd453f1d1f0
Author: René Wagner <rwa@clttr.info>
Date:   Wed, 14 Jul 2021 08:36:25 +0200

minor code cleanup in db_model

Diffstat:
Mgus/crawl.py | 6+++---
Mgus/excludes.py | 1+
Mgus/lib/gemini.py | 16----------------
3 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/gus/crawl.py b/gus/crawl.py @@ -325,8 +325,8 @@ def crawl_page( gr = gemini_resource url = gr.fetchable_url if gr.normalized_host in failure_count and failure_count[gr.normalized_host] > constants.MAXIMUM_FAILED_REQUEST_COUNT: - logging.warn( - "Too much failed requests for host, skipping: %s", gus.lib.logging.strip_control_chars(url) + logging.debug( + "Too many failed requests for host, skipping: %s", gus.lib.logging.strip_control_chars(url) ) return if max_crawl_depth >= 0 and current_depth > max_crawl_depth: @@ -341,7 +341,7 @@ def crawl_page( ) return if should_skip(gr): - logging.info( + logging.debug( "URL is excluded, skipping: %s", gus.lib.logging.strip_control_chars(url), ) diff --git a/gus/excludes.py b/gus/excludes.py @@ -17,6 +17,7 @@ EXCLUDED_URL_PREFIXES = [ # serving big files and slooow capsule -> takes to long to crawl "gemini://kamalatta.ddnss.de/", + "gemini://tweek.zyxxyz.eu/valentina/", # Mastodon proxy "gemini://mastogem.picasoft.net", diff --git a/gus/lib/gemini.py b/gus/lib/gemini.py @@ -95,7 +95,6 @@ class GeminiResource: self._normalized_host = None self._normalized_host_like = None self._fetchable_url = None - self._indexable_url = None self._is_root_like = None self._is_log_root_like = None self._is_log_post_like = None @@ -195,20 +194,6 @@ class GeminiResource: self._fetchable_url = url return self._fetchable_url - def _get_indexable_url(self): - if not self.is_valid: - return None - if self._indexable_url is None: - indexable_url = unquote(self.fetchable_url) - if self.urlsplit.port == 1965: - indexable_url = self.normalized_url.replace( - self.urlsplit.hostname.lower() + ":1965", - self.urlsplit.hostname.lower(), - 1, - ) - self._indexable_url = indexable_url - return self._indexable_url - def _get_is_root_like(self): if self._is_root_like is None: is_root_like = False @@ -369,7 +354,6 @@ class GeminiResource: fetchable_url = property(_get_fetchable_url) # constructed from fetchable_url # should be unquoted. - indexable_url = property(_get_indexable_url) is_root_like = property(_get_is_root_like) is_log_root_like = property(_get_is_log_root_like) is_log_post_like = property(_get_is_log_post_like)