gh-143925: Reject control characters in data: URL mediatypes

This commit is contained in:
Seth Michael Larson 2026-01-20 14:45:58 -06:00 committed by GitHub
parent 6262704b13
commit f25509e78e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 14 additions and 0 deletions

View File

@ -10,6 +10,7 @@ import unittest
from test import support
from test.support import os_helper
from test.support import socket_helper
from test.support import control_characters_c0
import os
import socket
try:
@ -590,6 +591,13 @@ class urlopen_DataTests(unittest.TestCase):
# missing padding character
self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
def test_invalid_mediatype(self):
for c0 in control_characters_c0():
self.assertRaises(ValueError,urllib.request.urlopen,
f'data:text/html;{c0},data')
for c0 in control_characters_c0():
self.assertRaises(ValueError,urllib.request.urlopen,
f'data:text/html{c0};base64,ZGF0YQ==')
class urlretrieve_FileTests(unittest.TestCase):
"""Test urllib.urlretrieve() on local files"""

View File

@ -1636,6 +1636,11 @@ class DataHandler(BaseHandler):
scheme, data = url.split(":",1)
mediatype, data = data.split(",",1)
# Disallow control characters within mediatype.
if re.search(r"[\x00-\x1F\x7F]", mediatype):
raise ValueError(
"Control characters not allowed in data: mediatype")
# even base64 encoded data URLs might be quoted so unquote in any case:
data = unquote_to_bytes(data)
if mediatype.endswith(";base64"):

View File

@ -0,0 +1 @@
Reject control characters in ``data:`` URL media types.