AssertionError on malformed URL through a proxy
Unsure whether this is the right place to handle this error – happy to report this elsewhere if you disagree.
When opening a request with a malformed URL through a proxy, an AssertionError from deep in the http library is reported.
Expected Result
ConnectionError, NameResolutionError or something more indicative of what the problem is
Actual Result
AssertionError: b''
Reproduction Steps
import requests
requests.get("http://%C2%A0www.github.com/", proxies=client.proxies)
^[[H---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
Cell In[36], line 1
----> 1 requests.get("http://%C2%A0www.github.com/", proxies=client.proxies)
File /opt/venv/lib/python3.12/site-packages/requests/api.py:73, in get(url, params, **kwargs)
62 def get(url, params=None, **kwargs):
63 r"""Sends a GET request.
64
65 :param url: URL for the new :class:`Request` object.
(...)
70 :rtype: requests.Response
71 """
---> 73 return request("get", url, params=params, **kwargs)
File /opt/venv/lib/python3.12/site-packages/requests/api.py:59, in request(method, url, **kwargs)
55 # By using the 'with' statement we are sure the session is closed, thus we
56 # avoid leaving sockets open which can trigger a ResourceWarning in some
57 # cases, and look like a memory leak in others.
58 with sessions.Session() as session:
---> 59 return session.request(method=method, url=url, **kwargs)
File /opt/venv/lib/python3.12/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
584 send_kwargs = {
585 "timeout": timeout,
586 "allow_redirects": allow_redirects,
587 }
588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
591 return resp
File /opt/venv/lib/python3.12/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
700 start = preferred_clock()
702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
705 # Total elapsed time of the request (approximately)
706 elapsed = preferred_clock() - start
File /opt/venv/lib/python3.12/site-packages/requests/adapters.py:667, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
664 timeout = TimeoutSauce(connect=timeout, read=timeout)
666 try:
--> 667 resp = conn.urlopen(
668 method=request.method,
669 url=url,
670 body=request.body,
671 headers=request.headers,
672 redirect=False,
673 assert_same_host=False,
674 preload_content=False,
675 decode_content=False,
676 retries=self.max_retries,
677 timeout=timeout,
678 chunked=chunked,
679 )
681 except (ProtocolError, OSError) as err:
682 raise ConnectionError(err, request=request)
File /opt/venv/lib/python3.12/site-packages/urllib3/connectionpool.py:789, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
786 response_conn = conn if not release_conn else None
788 # Make the request on the HTTPConnection object
--> 789 response = self._make_request(
790 conn,
791 method,
792 url,
793 timeout=timeout_obj,
794 body=body,
795 headers=headers,
796 chunked=chunked,
797 retries=retries,
798 response_conn=response_conn,
799 preload_content=preload_content,
800 decode_content=decode_content,
801 **response_kw,
802 )
804 # Everything went great!
805 clean_exit = True
File /opt/venv/lib/python3.12/site-packages/urllib3/connectionpool.py:495, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
492 # conn.request() calls http.client.*.request, not the method in
493 # urllib3.request. It also calls makefile (recv) on the socket.
494 try:
--> 495 conn.request(
496 method,
497 url,
498 body=body,
499 headers=headers,
500 chunked=chunked,
501 preload_content=preload_content,
502 decode_content=decode_content,
503 enforce_content_length=enforce_content_length,
504 )
506 # We are swallowing BrokenPipeError (errno.EPIPE) since the server is
507 # legitimately able to close the connection after sending a valid response.
508 # With this behaviour, the received response is still readable.
509 except BrokenPipeError:
File /opt/venv/lib/python3.12/site-packages/urllib3/connection.py:403, in HTTPConnection.request(self, method, url, body, headers, chunked, preload_content, decode_content, enforce_content_length)
401 skip_accept_encoding = "accept-encoding" in header_keys
402 skip_host = "host" in header_keys
--> 403 self.putrequest(
404 method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host
405 )
407 # Transform the body into an iterable of sendall()-able chunks
408 # and detect if an explicit Content-Length is doable.
409 chunks_and_cl = body_to_chunks(body, method=method, blocksize=self.blocksize)
File /opt/venv/lib/python3.12/site-packages/urllib3/connection.py:347, in HTTPConnection.putrequest(self, method, url, skip_host, skip_accept_encoding)
342 if match:
343 raise ValueError(
344 f"Method cannot contain non-token characters {method!r} (found at least {match.group()!r})"
345 )
--> 347 return super().putrequest(
348 method, url, skip_host=skip_host, skip_accept_encoding=skip_accept_encoding
349 )
File /usr/local/lib/python3.12/http/client.py:1214, in HTTPConnection.putrequest(self, method, url, skip_host, skip_accept_encoding)
1212 except UnicodeEncodeError:
1213 netloc_enc = netloc.encode("idna")
-> 1214 self.putheader('Host', _strip_ipv6_iface(netloc_enc))
1215 else:
1216 if self._tunnel_host:
File /usr/local/lib/python3.12/http/client.py:179, in _strip_ipv6_iface(enc_name)
177 enc_name, percent, _ = enc_name.partition(b"%")
178 if percent:
--> 179 assert enc_name.startswith(b'['), enc_name
180 enc_name += b']'
181 return enc_name
AssertionError: b''
System Information
$ python -m requests.help
"chardet": {
"version": null
},
"charset_normalizer": {
"version": "3.4.0"
},
"cryptography": {
"version": ""
},
"idna": {
"version": "3.10"
},
"implementation": {
"name": "CPython",
"version": "3.12.7"
},
"platform": {
"release": "6.8.0-36-generic",
"system": "Linux"
},
"pyOpenSSL": {
"openssl_version": "",
"version": null
},
"requests": {
"version": "2.32.3"
},
"system_ssl": {
"version": "1010117f"
},
"urllib3": {
"version": "2.2.3"
},
"using_charset_normalizer": true,
"using_pyopenssl": false
}
The issue you're encountering is related to how the requests library handles malformed URLs when making requests through a proxy. Specifically, the AssertionError is being raised because the URL contains invalid characters (in this case, %C2%A0, which is a non-breaking space encoded in UTF-8).
here is the solution :-
`import requests from urllib.parse import quote
Encode the URL properly
url = "http://%C2%A0www.github.com/" encoded_url = quote(url, safe=':/')
Make the request through the proxy
try: response = requests.get(encoded_url, proxies=client.proxies) print(response.status_code) except requests.exceptions.RequestException as e: print(f"An error occurred: {e}")
` By properly encoding the URL and handling potential errors, you can avoid the AssertionError and make your code more robust when dealing with malformed URLs through a proxy.
This isn't the appropriate place to fix this
@sigmavirus24 Thanks for the feedback. I'd like to understand it better. Could you point me to where this fix should be handled instead? I see my commit addressed it differently. I want to learn the right approach for future contributions.