deluge/deluge/httpdownloader.py
Chase Sterling ece31cf3cf
[Tests] Transition tests to pure pytest
Convert all the twisted.trial tests to pytest_twisted. Also move off of unittest.TestCase as well. Seems there were several tests which weren't actually testing what they should, and also some code that wasn't doing what the broken test said it should.

Goals:

    Remove twisted.trial tests
    Move to pytest fixtures, rather than many classess and subclasses with setup and teardown functions
    Move away from self.assertX to assert style tests
    FIx broken tests

Going forward I think these should be the goals when adding/modifying tests:

* Don't use BaseTest or set_up tear_down methods any more. Fixtures should be used either in the test module/class, or make/improve the ones available in conftest.py
* For sure don't use unittest or twisted.trial, they mess up the pytest stuff.
* Prefer pytest_twisted.ensureDeferred with an async function over inlineCallbacks.
  - I think the async function syntax is nicer, and it helps catch silly mistakes, e.g. await None is invalid, but yield None isn't, so if some function returns an unexpected thing we try to await on, it will be caught earlier. (I struggled debugging a test for quite a while, then caught it immediately when switching to the new syntax)
  - Once the maybe_coroutine PR goes in, using the async syntax can also improve tracebacks when debugging tests.

Things that should probably be cleaned up going forward:

* Remove BaseTestCase
* Remove the subclasses like DaemonBase in favor of new fixtures.
  * I think there are some other utility subclasses that could be removed too
* Perhaps use parameterization in the ui_entry tests, rather that the weird combination of subclasses and the set_var fixture I mixed in.
* Convert some of the callback stuff to pytest_twisted.ensureDeferred tests, just for nicer readability

Details relating to pytest fixtures conftest.py in root dir:
 * https://github.com/pytest-dev/pytest/issues/5822#issuecomment-697331920
 * https://docs.pytest.org/en/latest/deprecations.html#pytest-plugins-in-non-top-level-conftest-files

Closes: https://github.com/deluge-torrent/deluge/pull/354
2022-02-03 22:29:32 +00:00

334 lines
11 KiB
Python

#
# Copyright (C) 2009 Andrew Resch <andrewresch@gmail.com>
#
# This file is part of Deluge and is licensed under GNU General Public License 3.0, or later, with
# the additional special exception to link portions of this program with the OpenSSL library.
# See LICENSE for more details.
#
import cgi
import logging
import os.path
import zlib
from twisted.internet import reactor
from twisted.internet.defer import Deferred
from twisted.python.failure import Failure
from twisted.web import client, http
from twisted.web._newclient import HTTPClientParser
from twisted.web.error import Error, PageRedirect
from twisted.web.http_headers import Headers
from twisted.web.iweb import IAgent
from zope.interface import implementer
from deluge.common import get_version
log = logging.getLogger(__name__)
class CompressionDecoder(client.GzipDecoder):
"""A compression decoder for gzip, x-gzip and deflate."""
def deliverBody(self, protocol): # NOQA: N802
self.original.deliverBody(CompressionDecoderProtocol(protocol, self.original))
class CompressionDecoderProtocol(client._GzipProtocol):
"""A compression decoder protocol for CompressionDecoder."""
def __init__(self, protocol, response):
super().__init__(protocol, response)
self._zlibDecompress = zlib.decompressobj(32 + zlib.MAX_WBITS)
class BodyHandler(HTTPClientParser):
"""An HTTP parser that saves the response to a file."""
def __init__(self, request, finished, length, agent, encoding=None):
"""BodyHandler init.
Args:
request (t.w.i.IClientRequest): The parser request.
finished (Deferred): A Deferred to handle the finished response.
length (int): The length of the response.
agent (t.w.i.IAgent): The agent from which the request was sent.
"""
super().__init__(request, finished)
self.agent = agent
self.finished = finished
self.total_length = length
self.current_length = 0
self.data = b''
self.encoding = encoding
def dataReceived(self, data): # NOQA: N802
self.current_length += len(data)
self.data += data
if self.agent.part_callback:
self.agent.part_callback(data, self.current_length, self.total_length)
def connectionLost(self, reason): # NOQA: N802
if self.encoding:
self.data = self.data.decode(self.encoding).encode('utf8')
with open(self.agent.filename, 'wb') as _file:
_file.write(self.data)
self.finished.callback(self.agent.filename)
self.state = 'DONE'
HTTPClientParser.connectionLost(self, reason)
@implementer(IAgent)
class HTTPDownloaderAgent:
"""A File Downloader Agent."""
def __init__(
self,
agent,
filename,
part_callback=None,
force_filename=False,
allow_compression=True,
handle_redirect=True,
):
"""HTTPDownloaderAgent init.
Args:
agent (t.w.c.Agent): The agent which will send the requests.
filename (str): The filename to save the file as.
force_filename (bool): Forces use of the supplied filename,
regardless of header content.
part_callback (func): A function to be called when a part of data
is received, it's signature should be:
func(data, current_length, total_length)
"""
self.handle_redirect = handle_redirect
self.agent = agent
self.filename = filename
self.part_callback = part_callback
self.force_filename = force_filename
self.allow_compression = allow_compression
self.decoder = None
def request_callback(self, response):
finished = Deferred()
if not self.handle_redirect and response.code in (
http.MOVED_PERMANENTLY,
http.FOUND,
http.SEE_OTHER,
http.TEMPORARY_REDIRECT,
):
location = response.headers.getRawHeaders(b'location')[0]
error = PageRedirect(response.code, location=location)
finished.errback(Failure(error))
elif response.code >= 400:
error = Error(response.code)
finished.errback(Failure(error))
else:
headers = response.headers
body_length = int(headers.getRawHeaders(b'content-length', default=[0])[0])
if headers.hasHeader(b'content-disposition') and not self.force_filename:
content_disp = headers.getRawHeaders(b'content-disposition')[0].decode(
'utf-8'
)
content_disp_params = cgi.parse_header(content_disp)[1]
if 'filename' in content_disp_params:
new_file_name = content_disp_params['filename']
new_file_name = sanitise_filename(new_file_name)
new_file_name = os.path.join(
os.path.split(self.filename)[0], new_file_name
)
count = 1
fileroot = os.path.splitext(new_file_name)[0]
fileext = os.path.splitext(new_file_name)[1]
while os.path.isfile(new_file_name):
# Increment filename if already exists
new_file_name = f'{fileroot}-{count}{fileext}'
count += 1
self.filename = new_file_name
cont_type_header = headers.getRawHeaders(b'content-type')[0].decode()
cont_type, params = cgi.parse_header(cont_type_header)
# Only re-ecode text content types.
encoding = None
if cont_type.startswith('text/'):
encoding = params.get('charset', None)
response.deliverBody(
BodyHandler(response.request, finished, body_length, self, encoding)
)
return finished
def request(self, method, uri, headers=None, body_producer=None):
"""Issue a new request to the wrapped agent.
Args:
method (bytes): The HTTP method to use.
uri (bytes): The url to download from.
headers (t.w.h.Headers, optional): Any extra headers to send.
body_producer (t.w.i.IBodyProducer, optional): Request body data.
Returns:
Deferred: The filename of the of the downloaded file.
"""
if headers is None:
headers = Headers()
if not headers.hasHeader(b'User-Agent'):
version = get_version()
user_agent = 'Deluge/%s (https://deluge-torrent.org)' % version
headers.addRawHeader('User-Agent', user_agent)
d = self.agent.request(
method=method, uri=uri, headers=headers, bodyProducer=body_producer
)
d.addCallback(self.request_callback)
return d
def sanitise_filename(filename):
"""Sanitises a filename to use as a download destination file.
Logs any filenames that could be considered malicious.
filename (str): The filename to sanitise.
Returns:
str: The sanitised filename.
"""
# Remove any quotes
filename = filename.strip('\'"')
if os.path.basename(filename) != filename:
# Dodgy server, log it
log.warning(
'Potentially malicious server: trying to write to file: %s', filename
)
# Only use the basename
filename = os.path.basename(filename)
filename = filename.strip()
if filename.startswith('.') or ';' in filename or '|' in filename:
# Dodgy server, log it
log.warning(
'Potentially malicious server: trying to write to file: %s', filename
)
return filename
def _download_file(
url,
filename,
callback=None,
headers=None,
force_filename=False,
allow_compression=True,
handle_redirects=True,
):
"""Downloads a file from a specific URL and returns a Deferred.
A callback function can be specified to be called as parts are received.
Args:
url (str): The url to download from.
filename (str): The filename to save the file as.
callback (func): A function to be called when partial data is received,
it's signature should be: func(data, current_length, total_length)
headers (dict): Any optional headers to send.
force_filename (bool): Force using the filename specified rather than
one the server may suggest.
allow_compression (bool): Allows gzip & deflate decoding.
Returns:
Deferred: The filename of the downloaded file.
Raises:
t.w.e.PageRedirect
t.w.e.Error: for all other HTTP response errors
"""
agent = client.Agent(reactor)
if allow_compression:
enc_accepted = ['gzip', 'x-gzip', 'deflate']
decoders = [(enc.encode(), CompressionDecoder) for enc in enc_accepted]
agent = client.ContentDecoderAgent(agent, decoders)
if handle_redirects:
agent = client.RedirectAgent(agent)
agent = HTTPDownloaderAgent(
agent, filename, callback, force_filename, allow_compression, handle_redirects
)
# The Headers init expects dict values to be a list.
if headers:
for name, value in list(headers.items()):
if not isinstance(value, list):
headers[name] = [value]
return agent.request(b'GET', url.encode(), Headers(headers))
def download_file(
url,
filename,
callback=None,
headers=None,
force_filename=False,
allow_compression=True,
handle_redirects=True,
):
"""Downloads a file from a specific URL and returns a Deferred.
A callback function can be specified to be called as parts are received.
Args:
url (str): The url to download from.
filename (str): The filename to save the file as.
callback (func): A function to be called when partial data is received,
it's signature should be: func(data, current_length, total_length).
headers (dict): Any optional headers to send.
force_filename (bool): Force the filename specified rather than one the
server may suggest.
allow_compression (bool): Allows gzip & deflate decoding.
handle_redirects (bool): HTTP redirects handled automatically or not.
Returns:
Deferred: The filename of the downloaded file.
Raises:
t.w.e.PageRedirect: If handle_redirects is False.
t.w.e.Error: For all other HTTP response errors.
"""
def on_download_success(result):
log.debug('Download success!')
return result
def on_download_fail(failure):
log.warning(
'Error occurred downloading file from "%s": %s',
url,
failure.getErrorMessage(),
)
result = failure
return result
d = _download_file(
url,
filename,
callback=callback,
headers=headers,
force_filename=force_filename,
allow_compression=allow_compression,
handle_redirects=handle_redirects,
)
d.addCallbacks(on_download_success, on_download_fail)
return d