deluge/deluge/httpdownloader.py
Calum Lind b1cdc32f73 [Lint] Use Black to auto-format code
The move to using auto-formatter makes it easier to read, submit and
speeds up development time. https://github.com/ambv/black/

Although I would prefer 79 chars, the default line length of 88 chars
used by black suffices. The flake8 line length remains at 120 chars
since black does not touch comments or docstrings and this will require
another round of fixes.

The only black setting that is not standard is the use of double-quotes
for strings so disabled any formatting of these. Note however that
flake8 will still flag usage of double-quotes. I may change my mind on
double vs single quotes but for now leave them.

A new pyproject.toml file has been created for black configuration.
2018-10-03 15:21:53 +01:00

331 lines
11 KiB
Python

# -*- coding: utf-8 -*-
#
# Copyright (C) 2009 Andrew Resch <andrewresch@gmail.com>
#
# This file is part of Deluge and is licensed under GNU General Public License 3.0, or later, with
# the additional special exception to link portions of this program with the OpenSSL library.
# See LICENSE for more details.
#
from __future__ import unicode_literals
import cgi
import logging
import os.path
import zlib
from twisted.internet import reactor
from twisted.internet.defer import Deferred
from twisted.python.failure import Failure
from twisted.web import client, http
from twisted.web._newclient import HTTPClientParser
from twisted.web.error import PageRedirect
from twisted.web.http_headers import Headers
from twisted.web.iweb import IAgent
from zope.interface import implementer
from deluge.common import get_version
log = logging.getLogger(__name__)
class CompressionDecoder(client.GzipDecoder):
"""A compression decoder for gzip, x-gzip and deflate."""
def deliverBody(self, protocol): # NOQA: N802
self.original.deliverBody(CompressionDecoderProtocol(protocol, self.original))
class CompressionDecoderProtocol(client._GzipProtocol):
"""A compression decoder protocol for CompressionDecoder."""
def __init__(self, protocol, response):
super(CompressionDecoderProtocol, self).__init__(protocol, response)
self._zlibDecompress = zlib.decompressobj(32 + zlib.MAX_WBITS)
class BodyHandler(HTTPClientParser, object):
"""An HTTP parser that saves the response to a file."""
def __init__(self, request, finished, length, agent, encoding=None):
"""BodyHandler init.
Args:
request (t.w.i.IClientRequest): The parser request.
finished (Deferred): A Deferred to handle the finished response.
length (int): The length of the response.
agent (t.w.i.IAgent): The agent from which the request was sent.
"""
super(BodyHandler, self).__init__(request, finished)
self.agent = agent
self.finished = finished
self.total_length = length
self.current_length = 0
self.data = b''
self.encoding = encoding
def dataReceived(self, data): # NOQA: N802
self.current_length += len(data)
self.data += data
if self.agent.part_callback:
self.agent.part_callback(data, self.current_length, self.total_length)
def connectionLost(self, reason): # NOQA: N802
if self.encoding:
self.data = self.data.decode(self.encoding).encode('utf8')
with open(self.agent.filename, 'wb') as _file:
_file.write(self.data)
self.finished.callback(self.agent.filename)
self.state = u'DONE'
HTTPClientParser.connectionLost(self, reason)
@implementer(IAgent)
class HTTPDownloaderAgent(object):
"""A File Downloader Agent."""
def __init__(
self,
agent,
filename,
part_callback=None,
force_filename=False,
allow_compression=True,
handle_redirect=True,
):
"""HTTPDownloaderAgent init.
Args:
agent (t.w.c.Agent): The agent which will send the requests.
filename (str): The filename to save the file as.
force_filename (bool): Forces use of the supplied filename,
regardless of header content.
part_callback (func): A function to be called when a part of data
is received, it's signature should be:
func(data, current_length, total_length)
"""
self.handle_redirect = handle_redirect
self.agent = agent
self.filename = filename
self.part_callback = part_callback
self.force_filename = force_filename
self.allow_compression = allow_compression
self.decoder = None
def request_callback(self, response):
finished = Deferred()
if not self.handle_redirect and response.code in (
http.MOVED_PERMANENTLY,
http.FOUND,
http.SEE_OTHER,
http.TEMPORARY_REDIRECT,
):
location = response.headers.getRawHeaders(b'location')[0]
error = PageRedirect(response.code, location=location)
finished.errback(Failure(error))
else:
headers = response.headers
body_length = int(headers.getRawHeaders(b'content-length', default=[0])[0])
if headers.hasHeader(b'content-disposition') and not self.force_filename:
content_disp = headers.getRawHeaders(b'content-disposition')[0].decode(
'utf-8'
)
content_disp_params = cgi.parse_header(content_disp)[1]
if 'filename' in content_disp_params:
new_file_name = content_disp_params['filename']
new_file_name = sanitise_filename(new_file_name)
new_file_name = os.path.join(
os.path.split(self.filename)[0], new_file_name
)
count = 1
fileroot = os.path.splitext(new_file_name)[0]
fileext = os.path.splitext(new_file_name)[1]
while os.path.isfile(new_file_name):
# Increment filename if already exists
new_file_name = '%s-%s%s' % (fileroot, count, fileext)
count += 1
self.filename = new_file_name
cont_type = headers.getRawHeaders(b'content-type')[0].decode()
params = cgi.parse_header(cont_type)[1]
encoding = params.get('charset', None)
response.deliverBody(
BodyHandler(response.request, finished, body_length, self, encoding)
)
return finished
def request(self, method, uri, headers=None, body_producer=None):
"""Issue a new request to the wrapped agent.
Args:
method (bytes): The HTTP method to use.
uri (bytes): The url to download from.
headers (t.w.h.Headers, optional): Any extra headers to send.
body_producer (t.w.i.IBodyProducer, optional): Request body data.
Returns:
Deferred: The filename of the of the downloaded file.
"""
if headers is None:
headers = Headers()
if not headers.hasHeader(b'User-Agent'):
version = get_version()
user_agent = 'Deluge/%s (https://deluge-torrent.org)' % version
headers.addRawHeader('User-Agent', user_agent)
d = self.agent.request(
method=method, uri=uri, headers=headers, bodyProducer=body_producer
)
d.addCallback(self.request_callback)
return d
def sanitise_filename(filename):
"""Sanitises a filename to use as a download destination file.
Logs any filenames that could be considered malicious.
filename (str): The filename to sanitise.
Returns:
str: The sanitised filename.
"""
# Remove any quotes
filename = filename.strip('\'"')
if os.path.basename(filename) != filename:
# Dodgy server, log it
log.warning(
'Potentially malicious server: trying to write to file: %s', filename
)
# Only use the basename
filename = os.path.basename(filename)
filename = filename.strip()
if filename.startswith('.') or ';' in filename or '|' in filename:
# Dodgy server, log it
log.warning(
'Potentially malicious server: trying to write to file: %s', filename
)
return filename
def _download_file(
url,
filename,
callback=None,
headers=None,
force_filename=False,
allow_compression=True,
handle_redirects=True,
):
"""Downloads a file from a specific URL and returns a Deferred.
A callback function can be specified to be called as parts are received.
Args:
url (str): The url to download from.
filename (str): The filename to save the file as.
callback (func): A function to be called when partial data is received,
it's signature should be: func(data, current_length, total_length)
headers (dict): Any optional headers to send.
force_filename (bool): Force using the filename specified rather than
one the server may suggest.
allow_compression (bool): Allows gzip & deflate decoding.
Returns:
Deferred: The filename of the downloaded file.
Raises:
t.w.e.PageRedirect
t.w.e.Error: for all other HTTP response errors
"""
agent = client.Agent(reactor)
if allow_compression:
enc_accepted = ['gzip', 'x-gzip', 'deflate']
decoders = [(enc.encode(), CompressionDecoder) for enc in enc_accepted]
agent = client.ContentDecoderAgent(agent, decoders)
if handle_redirects:
agent = client.RedirectAgent(agent)
agent = HTTPDownloaderAgent(
agent, filename, callback, force_filename, allow_compression, handle_redirects
)
# The Headers init expects dict values to be a list.
if headers:
for name, value in list(headers.items()):
if not isinstance(value, list):
headers[name] = [value]
return agent.request(b'GET', url.encode(), Headers(headers))
def download_file(
url,
filename,
callback=None,
headers=None,
force_filename=False,
allow_compression=True,
handle_redirects=True,
):
"""Downloads a file from a specific URL and returns a Deferred.
A callback function can be specified to be called as parts are received.
Args:
url (str): The url to download from.
filename (str): The filename to save the file as.
callback (func): A function to be called when partial data is received,
it's signature should be: func(data, current_length, total_length).
headers (dict): Any optional headers to send.
force_filename (bool): Force the filename specified rather than one the
server may suggest.
allow_compression (bool): Allows gzip & deflate decoding.
handle_redirects (bool): HTTP redirects handled automatically or not.
Returns:
Deferred: The filename of the downloaded file.
Raises:
t.w.e.PageRedirect: If handle_redirects is False.
t.w.e.Error: For all other HTTP response errors.
"""
def on_download_success(result):
log.debug('Download success!')
return result
def on_download_fail(failure):
log.warning(
'Error occurred downloading file from "%s": %s',
url,
failure.getErrorMessage(),
)
result = failure
return result
d = _download_file(
url,
filename,
callback=callback,
headers=headers,
force_filename=force_filename,
allow_compression=allow_compression,
handle_redirects=handle_redirects,
)
d.addCallbacks(on_download_success, on_download_fail)
return d