Source code for bushel.collector.remote
"""Remote CollecTor instance interaction.
This module provides tools for interacting with remote CollecTor instances,
such as those run by `Tor Metrics <https://metrics.torproject.org/>`_ or
3rd-party public or private CollecTor instances.
.. data:: DEFAULT_COLLECTOR_HOST
The default CollecTor host to use when none is specified, currently
`collector.torproject.org` although this is subject to change. It will be
set to the currently recommended public Tor Metrics instance.
.. data:: DEFAULT_INDEX_COMPRESSION
The default compression algorithm used with CollecTor indexes. This is
currently set to *xz* although is subject to change in line with any
recommendations from Tor Metrics.
"""
import typing
import requests
from bushel.collector.filesystem import collector_index_path
from bushel.collector.filesystem import CollecTorIndexCompression
from bushel.collector.index import CollecTorIndex
DEFAULT_COLLECTOR_HOST = "collector.torproject.org"
DEFAULT_INDEX_COMPRESSION = CollecTorIndexCompression.XZ
[docs]class CollecTorRemote:
"""
A remote CollecTor instance. Methods are provided for querying the data
available on the remote instance, as well as retrieving data from the
remote instance.
:param str host: The FQDN of the CollecTor instance. If None, then the
:data:`DEFAULT_COLLECTOR_HOST` is used.
:param bool https: Whether HTTPS should be used. This defaults to *True*.
"""
host: str
https: bool
def __init__(self,
host: typing.Optional[str] = None,
*,
https: bool = True) -> None:
self.host = host or DEFAULT_COLLECTOR_HOST
self.https = https
[docs] def get_raw_by_path(self, path: str) -> bytes:
"""
Fetch the raw bytes of a file from a CollecTor instance.
:param str path: CollecTor path with no leading slash (/).
:rtype: bytes
:returns: Raw bytes of the reply, which may be compressed depending on
the requested path.
"""
if not isinstance(path, str):
raise TypeError("CollecTor paths must be strings.")
if len(path) >= 1 and path[0] == "/":
raise ValueError("CollecTor paths must not have leading slashes. "
"The path is always considered to be absolute.")
url = "http" + ("s" if self.https else "") + f"://{self.host}/{path}"
req = requests.get(url)
return req.content
[docs] def get_index(self, compression: typing.Optional[CollecTorIndexCompression]
) -> CollecTorIndex:
"""
Fetch the index from the CollecTor instance, optionally specifying the
compression algorithm to use. This function will return an object that
contains the (decompressed if necessary) and parsed index.
:param CollecTorIndexCompression compression: Compression algorithm to
use. If *None*, the default specified in
:data:`DEFAULT_INDEX_COMPRESSION` will be used.
:rtype: CollecTorIndex
"""
compression = compression or DEFAULT_INDEX_COMPRESSION
raw_bytes = self.get_raw_by_path(collector_index_path(compression))
decompressed_bytes = compression.decompress(raw_bytes)
return CollecTorIndex(decompressed_bytes)
[docs]def get_index(host: typing.Optional[str] = None,
compression: typing.Optional[CollecTorIndexCompression] = None,
*,
https: bool = True) -> CollecTorIndex:
"""
Convenience function for
``CollecTorRemote(host, https=https).get_index(compression)``.
.. seealso:: :meth:`CollecTorRemote.get_index`
"""
return CollecTorRemote(host, https=https).get_index(compression)