From a619176adb0fb0d63058e7f075faa1cba584e787 Mon Sep 17 00:00:00 2001 From: Himanshu Pathak Date: Fri, 9 Jan 2026 00:12:27 +0530 Subject: [PATCH 1/4] Sanitize XML output by stripping invalid characters in esc_xml function --- src/wp-includes/formatting.php | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index f59f877775b77..4fe0dba9a9bd4 100644 --- a/src/wp-includes/formatting.php +++ b/src/wp-includes/formatting.php @@ -4760,6 +4760,16 @@ function esc_textarea( $text ) { function esc_xml( $text ) { $safe_text = wp_check_invalid_utf8( $text ); + // Strip invalid XML characters. + $is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ), true ); + if ( $is_utf8 ) { + $safe_text = preg_replace( + '/[^\x{9}\x{A}\x{D}\x{20}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]/u', + '', + $safe_text + ); + } + $cdata_regex = '\<\!\[CDATA\[.*?\]\]\>'; $regex = << Date: Fri, 9 Jan 2026 00:14:28 +0530 Subject: [PATCH 2/4] Tests: Add unit tests to strip invalid XML control characters in esc_xml function --- tests/phpunit/tests/formatting/escXml.php | 72 +++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/tests/phpunit/tests/formatting/escXml.php b/tests/phpunit/tests/formatting/escXml.php index 6fdbd136a7236..3e3198653240a 100644 --- a/tests/phpunit/tests/formatting/escXml.php +++ b/tests/phpunit/tests/formatting/escXml.php @@ -139,4 +139,76 @@ public function data_ignores_cdata_sections() { ), ); } + + /** + * Test that invalid XML control characters are stripped. + * + * @dataProvider data_strips_invalid_xml_characters + * + * @param string $source The source string containing invalid XML characters. + * @param string $expected The expected string with invalid characters removed. + */ + public function test_strips_invalid_xml_characters( $source, $expected ) { + update_option( 'blog_charset', 'UTF-8' ); + $actual = esc_xml( $source ); + $this->assertSame( $expected, $actual ); + } + + /** + * Data provider for `test_strips_invalid_xml_characters()`. + * + * @return array { + * @type string $source The source string containing invalid XML characters. + * @type string $expected The expected string with invalid characters removed. + * } + */ + public function data_strips_invalid_xml_characters() { + return array( + // Vertical tab (0x0B) - invalid in XML. + array( + "This contains a vertical tab\x0Bcharacter", + 'This contains a vertical tabcharacter', + ), + // File separator (0x1C) - invalid in XML. + array( + "File separator\x1Ctest", + 'File separatortest', + ), + // NULL byte (0x00) - invalid in XML. + array( + "Text with\x00null byte", + 'Text withnull byte', + ), + // Bell character (0x07) - invalid in XML. + array( + "Bell\x07character", + 'Bellcharacter', + ), + // Multiple invalid characters. + array( + "Multiple\x00invalid\x0B\x1Ccharacters\x07here", + 'Multipleinvalidcharactershere', + ), + // Valid control characters should be preserved: tab (0x09), LF (0x0A), CR (0x0D). + array( + "Tab\tlinefeed\ncarriage return\rtest", + "Tab\tlinefeed\ncarriage return\rtest", + ), + // Mix of valid and invalid. + array( + "Valid\ttab but\x0Binvalid vertical tab", + "Valid\ttab butinvalid vertical tab", + ), + // Text without invalid characters should remain unchanged. + array( + 'Normal text with spaces and punctuation!', + 'Normal text with spaces and punctuation!', + ), + // Unicode characters in valid range should be preserved. + array( + 'Unicode: café, naïve, 日本語', + 'Unicode: café, naïve, 日本語', + ), + ); + } } From 7ed281654c1c8f995c07f51643c248b5899925b4 Mon Sep 17 00:00:00 2001 From: Himanshu Pathak Date: Fri, 9 Jan 2026 00:15:04 +0530 Subject: [PATCH 3/4] Tests: Add unit test to strip invalid XML characters within CDATA sections in esc_xml function --- tests/phpunit/tests/formatting/escXml.php | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/phpunit/tests/formatting/escXml.php b/tests/phpunit/tests/formatting/escXml.php index 3e3198653240a..c09b289a9a577 100644 --- a/tests/phpunit/tests/formatting/escXml.php +++ b/tests/phpunit/tests/formatting/escXml.php @@ -211,4 +211,15 @@ public function data_strips_invalid_xml_characters() { ), ); } + + /** + * Test that invalid XML characters within CDATA sections are also stripped. + */ + public function test_strips_invalid_xml_characters_outside_cdata() { + update_option( 'blog_charset', 'UTF-8' ); + $source = "Text\x0Bwith]]>and\x1Cmore\x00invalid"; + $expected = 'Textwith]]>andmoreinvalid'; + $actual = esc_xml( $source ); + $this->assertSame( $expected, $actual ); + } } From ac7136a7446de46e4c739658fe88150af6a179e3 Mon Sep 17 00:00:00 2001 From: Himanshu Pathak Date: Fri, 9 Jan 2026 00:16:08 +0530 Subject: [PATCH 4/4] Tests: Add unit test to verify esc_xml function behavior with non-UTF-8 charset --- tests/phpunit/tests/formatting/escXml.php | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/phpunit/tests/formatting/escXml.php b/tests/phpunit/tests/formatting/escXml.php index c09b289a9a577..78d735f692618 100644 --- a/tests/phpunit/tests/formatting/escXml.php +++ b/tests/phpunit/tests/formatting/escXml.php @@ -222,4 +222,14 @@ public function test_strips_invalid_xml_characters_outside_cdata() { $actual = esc_xml( $source ); $this->assertSame( $expected, $actual ); } + + /** + * Test that the function works correctly when charset is not UTF-8. + */ + public function test_non_utf8_charset_skips_invalid_character_stripping() { + update_option( 'blog_charset', 'ISO-8859-1' ); + $source = "Test\x0Btext"; + $actual = esc_xml( $source ); + $this->assertIsString( $actual ); + } }