scrapy · Gallaecio · Apr 25, 2023 · May 4, 2023 · May 4, 2023 · May 4, 2023
diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
@@ -12,6 +12,7 @@
 from twisted.internet.endpoints import TCP4ClientEndpoint
 from twisted.internet.error import TimeoutError
 from twisted.python.failure import Failure
+from twisted.web._newclient import HTTPClientParser
 from twisted.web.client import (
  URI,
  Agent,
@@ -35,6 +36,11 @@
 logger = logging.getLogger(__name__)
 
 
+# Monkey-patch to increase the maximum length for (header) lines, which is
+# 2**14 by default as of Twisted 22.10.0.
+HTTPClientParser.MAX_LENGTH = max(2**16, HTTPClientParser.MAX_LENGTH)
+
+
 class HTTP11DownloadHandler:
  lazy = False
 

diff --git a/tests/test_downloader_handlers.py b/tests/test_downloader_handlers.py
@@ -215,6 +215,12 @@ def render(self, request):
  return b""
 
 
+class LongHeaderResource(resource.Resource):
+ def render(self, request):
+ request.responseHeaders.setRawHeaders(b"a", [b"a" * 2**15])
+ return b""
+
+
 class HttpTestCase(unittest.TestCase):
  scheme = "http"
  download_handler_cls: Type = HTTPDownloadHandler
@@ -241,6 +247,7 @@ def setUp(self):
  r.putChild(b"nocontenttype", EmptyContentTypeHeaderResource())
  r.putChild(b"largechunkedfile", LargeChunkedFileResource())
  r.putChild(b"duplicate-header", DuplicateHeaderResource())
+ r.putChild(b"long-header", LongHeaderResource())
  r.putChild(b"echo", Echo())
  self.site = server.Site(r, timeout=None)
  self.wrapper = WrappingFactory(self.site)
@@ -424,6 +431,13 @@ def _test(response):
  request = Request(self.getURL("duplicate-header"))
  return self.download_request(request, Spider("foo")).addCallback(_test)
 
+ def test_get_long_header(self):
+ def _test(response):
+ self.assertEqual(response.headers.get(b"a"), b"a" * 2**15)
+
+ request = Request(self.getURL("long-header"))
+ return self.download_request(request, Spider("foo")).addCallback(_test)
+
 
 class Http10TestCase(HttpTestCase):
  """HTTP 1.0 test case"""
@@ -437,6 +451,9 @@ def test_protocol(self):
  d.addCallback(self.assertEqual, "HTTP/1.0")
  return d
 
+ def test_get_long_header(self):
+ raise unittest.SkipTest("Scrapy does not support long headers on HTTP/1.0")
+
 
 class Https10TestCase(Http10TestCase):
  scheme = "https"