From d8850aac54c234201966c66e83225564302cd15c Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Fri, 16 Jan 2026 10:54:09 -0600 Subject: [PATCH 1/2] Add 'test.support' fixture for C0 control characters --- Lib/test/support/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 3a639497fa1272..9c113a6c137e52 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -3303,3 +3303,10 @@ def linked_to_musl(): return _linked_to_musl _linked_to_musl = tuple(map(int, version.split('.'))) return _linked_to_musl + + +def control_characters_c0() -> list[str]: + """Returns a list of C0 control characters as strings. + C0 control characters defined as the byte range 0x00-0x1F, and 0x7F. + """ + return [chr(c) for c in range(0x00, 0x20)] + ["\x7F"] From 5fcd6446f9b527901caaf2ead8dddf562cd25894 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Tue, 30 Dec 2025 08:50:45 -0600 Subject: [PATCH 2/2] gh-143925: Reject control characters in data: URL mediatypes --- Lib/test/test_urllib.py | 8 ++++++++ Lib/urllib/request.py | 5 +++++ .../2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst | 1 + 3 files changed, 14 insertions(+) create mode 100644 Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index ae524c5ffba6b1..2dd739b77b8e4d 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -10,6 +10,7 @@ from test import support from test.support import os_helper from test.support import socket_helper +from test.support import control_characters_c0 import os import socket try: @@ -590,6 +591,13 @@ def test_invalid_base64_data(self): # missing padding character self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=') + def test_invalid_mediatype(self): + for c0 in control_characters_c0(): + self.assertRaises(ValueError,urllib.request.urlopen, + f'data:text/html;{c0},data') + for c0 in control_characters_c0(): + self.assertRaises(ValueError,urllib.request.urlopen, + f'data:text/html{c0};base64,ZGF0YQ==') class urlretrieve_FileTests(unittest.TestCase): """Test urllib.urlretrieve() on local files""" diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index f32de189b1353a..f5f17f223a4585 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1636,6 +1636,11 @@ def data_open(self, req): scheme, data = url.split(":",1) mediatype, data = data.split(",",1) + # Disallow control characters within mediatype. + if re.search(r"[\x00-\x1F\x7F]", mediatype): + raise ValueError( + "Control characters not allowed in data: mediatype") + # even base64 encoded data URLs might be quoted so unquote in any case: data = unquote_to_bytes(data) if mediatype.endswith(";base64"): diff --git a/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst b/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst new file mode 100644 index 00000000000000..46109dfbef3ee7 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst @@ -0,0 +1 @@ +Reject control characters in ``data:`` URL media types.