Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Lib/test/support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3303,3 +3303,10 @@ def linked_to_musl():
return _linked_to_musl
_linked_to_musl = tuple(map(int, version.split('.')))
return _linked_to_musl


def control_characters_c0() -> list[str]:
"""Returns a list of C0 control characters as strings.
C0 control characters defined as the byte range 0x00-0x1F, and 0x7F.
"""
return [chr(c) for c in range(0x00, 0x20)] + ["\x7F"]
8 changes: 8 additions & 0 deletions Lib/test/test_urllib.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from test import support
from test.support import os_helper
from test.support import socket_helper
from test.support import control_characters_c0
import os
import socket
try:
Expand Down Expand Up @@ -590,6 +591,13 @@ def test_invalid_base64_data(self):
# missing padding character
self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')

def test_invalid_mediatype(self):
for c0 in control_characters_c0():
self.assertRaises(ValueError,urllib.request.urlopen,
f'data:text/html;{c0},data')
for c0 in control_characters_c0():
self.assertRaises(ValueError,urllib.request.urlopen,
f'data:text/html{c0};base64,ZGF0YQ==')

class urlretrieve_FileTests(unittest.TestCase):
"""Test urllib.urlretrieve() on local files"""
Expand Down
5 changes: 5 additions & 0 deletions Lib/urllib/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -1636,6 +1636,11 @@ def data_open(self, req):
scheme, data = url.split(":",1)
mediatype, data = data.split(",",1)

# Disallow control characters within mediatype.
if re.search(r"[\x00-\x1F\x7F]", mediatype):
raise ValueError(
"Control characters not allowed in data: mediatype")

# even base64 encoded data URLs might be quoted so unquote in any case:
data = unquote_to_bytes(data)
if mediatype.endswith(";base64"):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Reject control characters in ``data:`` URL media types.
Loading