From 66a1fcc69765bb704146fe7d084848302dd3c89e Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 15 Jul 2023 17:33:52 +0200 Subject: [PATCH 2/3] Sanitize libxml2 globals before parsing Fixes GHSA-3qrf-m4j2-pcrr. To parse a document with libxml2, you first need to create a parsing context. The parsing context contains parsing options (e.g. XML_NOENT to substitute entities) that the application (in this case PHP) can set. Unfortunately, libxml2 also supports providing default set options. For example, if you call xmlSubstituteEntitiesDefault(1) then the XML_NOENT option will be added to the parsing options every time you create a parsing context **even if the application never requested XML_NOENT**. Third party extensions can override these globals, in particular the substitute entity global. This causes entity substitution to be unexpectedly active. Fix it by setting the parsing options to a sane known value. For API calls that depend on global state we introduce PHP_LIBXML_SANITIZE_GLOBALS() and PHP_LIBXML_RESTORE_GLOBALS(). For other APIs that work directly with a context we introduce php_libxml_sanitize_parse_ctxt_options(). (cherry picked from commit c283c3ab0ba45d21b2b8745c1f9c7cbfe771c975) (cherry picked from commit b3758bd21223b97c042cae7bd26a66cde081ea98) (cherry picked from commit 4fb61f06b1aff89a4d7e548c37ffa5bf573270c3) (cherry picked from commit d7de6908dfc8774e86a54100ad4e2ee810426001) --- ext/dom/document.c | 15 +++++++++++++++ ext/dom/documentfragment.c | 2 ++ ext/libxml/php_libxml.h | 36 +++++++++++++++++++++++++++++++++++ ext/simplexml/simplexml.c | 6 ++++++ ext/soap/php_xml.c | 2 ++ ext/xml/compat.c | 2 ++ ext/xmlreader/php_xmlreader.c | 9 +++++++++ ext/xsl/xsltprocessor.c | 9 ++++----- 8 files changed, 76 insertions(+), 5 deletions(-) diff --git a/ext/dom/document.c b/ext/dom/document.c index c212faa695..b0eed5820b 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -1438,6 +1438,7 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so options |= XML_PARSE_NOBLANKS; } + php_libxml_sanitize_parse_ctxt_options(ctxt); xmlCtxtUseOptions(ctxt, options); ctxt->recovery = recover; @@ -1735,7 +1736,9 @@ PHP_FUNCTION(dom_document_xinclude) DOM_GET_OBJ(docp, id, xmlDocPtr, intern); + PHP_LIBXML_SANITIZE_GLOBALS(xinclude); err = xmlXIncludeProcessFlags(docp, (int)flags); + PHP_LIBXML_RESTORE_GLOBALS(xinclude); /* XML_XINCLUDE_START and XML_XINCLUDE_END nodes need to be removed as these are added via xmlXIncludeProcess to mark beginning and ending of xincluded document @@ -1774,6 +1777,7 @@ PHP_FUNCTION(dom_document_validate) DOM_GET_OBJ(docp, id, xmlDocPtr, intern); + PHP_LIBXML_SANITIZE_GLOBALS(validate); cvp = xmlNewValidCtxt(); cvp->userData = NULL; @@ -1785,6 +1789,7 @@ PHP_FUNCTION(dom_document_validate) } else { RETVAL_FALSE; } + PHP_LIBXML_RESTORE_GLOBALS(validate); xmlFreeValidCtxt(cvp); @@ -1818,14 +1823,18 @@ static void _dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type DOM_GET_OBJ(docp, id, xmlDocPtr, intern); + PHP_LIBXML_SANITIZE_GLOBALS(new_parser_ctxt); + switch (type) { case DOM_LOAD_FILE: if (CHECK_NULL_PATH(source, source_len)) { + PHP_LIBXML_RESTORE_GLOBALS(new_parser_ctxt); php_error_docref(NULL, E_WARNING, "Invalid Schema file source"); RETURN_FALSE; } valid_file = _dom_get_valid_file_path(source, resolved_path, MAXPATHLEN); if (!valid_file) { + PHP_LIBXML_RESTORE_GLOBALS(new_parser_ctxt); php_error_docref(NULL, E_WARNING, "Invalid Schema file source"); RETURN_FALSE; } @@ -1846,6 +1855,7 @@ static void _dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type parser); sptr = xmlSchemaParse(parser); xmlSchemaFreeParserCtxt(parser); + PHP_LIBXML_RESTORE_GLOBALS(new_parser_ctxt); if (!sptr) { php_error_docref(NULL, E_WARNING, "Invalid Schema"); RETURN_FALSE; @@ -1866,11 +1876,13 @@ static void _dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type } #endif + PHP_LIBXML_SANITIZE_GLOBALS(validate); xmlSchemaSetValidOptions(vptr, valid_opts); xmlSchemaSetValidErrors(vptr, php_libxml_error_handler, php_libxml_error_handler, vptr); is_valid = xmlSchemaValidateDoc(vptr, docp); xmlSchemaFree(sptr); xmlSchemaFreeValidCtxt(vptr); + PHP_LIBXML_RESTORE_GLOBALS(validate); if (is_valid == 0) { RETURN_TRUE; @@ -1940,12 +1952,14 @@ static void _dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAMETERS, int typ return; } + PHP_LIBXML_SANITIZE_GLOBALS(parse); xmlRelaxNGSetParserErrors(parser, (xmlRelaxNGValidityErrorFunc) php_libxml_error_handler, (xmlRelaxNGValidityWarningFunc) php_libxml_error_handler, parser); sptr = xmlRelaxNGParse(parser); xmlRelaxNGFreeParserCtxt(parser); + PHP_LIBXML_RESTORE_GLOBALS(parse); if (!sptr) { php_error_docref(NULL, E_WARNING, "Invalid RelaxNG"); RETURN_FALSE; @@ -2045,6 +2059,7 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */ ctxt->sax->error = php_libxml_ctx_error; ctxt->sax->warning = php_libxml_ctx_warning; } + php_libxml_sanitize_parse_ctxt_options(ctxt); if (options) { htmlCtxtUseOptions(ctxt, (int)options); } diff --git a/ext/dom/documentfragment.c b/ext/dom/documentfragment.c index 87cb691501..b7ecfdc14b 100644 --- a/ext/dom/documentfragment.c +++ b/ext/dom/documentfragment.c @@ -134,7 +134,9 @@ PHP_METHOD(domdocumentfragment, appendXML) { } if (data) { + PHP_LIBXML_SANITIZE_GLOBALS(parse); err = xmlParseBalancedChunkMemory(nodep->doc, NULL, NULL, 0, (xmlChar *) data, &lst); + PHP_LIBXML_RESTORE_GLOBALS(parse); if (err != 0) { RETURN_FALSE; } diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h index f7aa726d88..371b4eecc5 100644 --- a/ext/libxml/php_libxml.h +++ b/ext/libxml/php_libxml.h @@ -122,6 +122,42 @@ PHP_LIBXML_API void php_libxml_shutdown(void); ZEND_TSRMLS_CACHE_EXTERN() #endif +/* Other extension may override the global state options, these global options + * are copied initially to ctxt->options. Set the options to a known good value. + * See libxml2 globals.c and parserInternals.c. + * The unique_name argument allows multiple sanitizes and restores within the + * same function, even nested is necessary. */ +#define PHP_LIBXML_SANITIZE_GLOBALS(unique_name) \ + int xml_old_loadsubset_##unique_name = xmlLoadExtDtdDefaultValue; \ + xmlLoadExtDtdDefaultValue = 0; \ + int xml_old_validate_##unique_name = xmlDoValidityCheckingDefaultValue; \ + xmlDoValidityCheckingDefaultValue = 0; \ + int xml_old_pedantic_##unique_name = xmlPedanticParserDefault(0); \ + int xml_old_substitute_##unique_name = xmlSubstituteEntitiesDefault(0); \ + int xml_old_linenrs_##unique_name = xmlLineNumbersDefault(0); \ + int xml_old_blanks_##unique_name = xmlKeepBlanksDefault(1); + +#define PHP_LIBXML_RESTORE_GLOBALS(unique_name) \ + xmlLoadExtDtdDefaultValue = xml_old_loadsubset_##unique_name; \ + xmlDoValidityCheckingDefaultValue = xml_old_validate_##unique_name; \ + (void) xmlPedanticParserDefault(xml_old_pedantic_##unique_name); \ + (void) xmlSubstituteEntitiesDefault(xml_old_substitute_##unique_name); \ + (void) xmlLineNumbersDefault(xml_old_linenrs_##unique_name); \ + (void) xmlKeepBlanksDefault(xml_old_blanks_##unique_name); + +/* Alternative for above, working directly on the context and not setting globals. + * Generally faster because no locking is involved, and this has the advantage that it sets the options to a known good value. */ +static zend_always_inline void php_libxml_sanitize_parse_ctxt_options(xmlParserCtxtPtr ctxt) +{ + ctxt->loadsubset = 0; + ctxt->validate = 0; + ctxt->pedantic = 0; + ctxt->replaceEntities = 0; + ctxt->linenumbers = 0; + ctxt->keepBlanks = 1; + ctxt->options = 0; +} + #else /* HAVE_LIBXML */ #define libxml_module_ptr NULL #endif diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c index ae067cdcdd..bfac015964 100644 --- a/ext/simplexml/simplexml.c +++ b/ext/simplexml/simplexml.c @@ -2229,7 +2229,9 @@ PHP_FUNCTION(simplexml_load_file) RETURN_FALSE; } + PHP_LIBXML_SANITIZE_GLOBALS(read_file); docp = xmlReadFile(filename, NULL, (int)options); + PHP_LIBXML_RESTORE_GLOBALS(read_file); if (!docp) { RETURN_FALSE; @@ -2283,7 +2285,9 @@ PHP_FUNCTION(simplexml_load_string) RETURN_FALSE; } + PHP_LIBXML_SANITIZE_GLOBALS(read_memory); docp = xmlReadMemory(data, (int)data_len, NULL, NULL, (int)options); + PHP_LIBXML_RESTORE_GLOBALS(read_memory); if (!docp) { RETURN_FALSE; @@ -2333,7 +2337,9 @@ SXE_METHOD(__construct) return; } + PHP_LIBXML_SANITIZE_GLOBALS(read_file_or_memory); docp = is_url ? xmlReadFile(data, NULL, (int)options) : xmlReadMemory(data, (int)data_len, NULL, NULL, (int)options); + PHP_LIBXML_RESTORE_GLOBALS(read_file_or_memory); if (!docp) { ((php_libxml_node_object *)sxe)->document = NULL; diff --git a/ext/soap/php_xml.c b/ext/soap/php_xml.c index 1ac684eb81..053960c559 100644 --- a/ext/soap/php_xml.c +++ b/ext/soap/php_xml.c @@ -94,6 +94,7 @@ xmlDocPtr soap_xmlParseFile(const char *filename) if (ctxt) { zend_bool old; + php_libxml_sanitize_parse_ctxt_options(ctxt); ctxt->keepBlanks = 0; ctxt->sax->ignorableWhitespace = soap_ignorableWhitespace; ctxt->sax->comment = soap_Comment; @@ -144,6 +145,7 @@ xmlDocPtr soap_xmlParseMemory(const void *buf, size_t buf_size) if (ctxt) { zend_bool old; + php_libxml_sanitize_parse_ctxt_options(ctxt); ctxt->sax->ignorableWhitespace = soap_ignorableWhitespace; ctxt->sax->comment = soap_Comment; ctxt->sax->warning = NULL; diff --git a/ext/xml/compat.c b/ext/xml/compat.c index ef83485722..c55047215b 100644 --- a/ext/xml/compat.c +++ b/ext/xml/compat.c @@ -19,6 +19,7 @@ #include "php.h" #if defined(HAVE_LIBXML) && (defined(HAVE_XML) || defined(HAVE_XMLRPC)) && !defined(HAVE_LIBEXPAT) #include "expat_compat.h" +#include "ext/libxml/php_libxml.h" typedef struct _php_xml_ns { xmlNsPtr nsptr; @@ -473,6 +474,7 @@ XML_ParserCreate_MM(const XML_Char *encoding, const XML_Memory_Handling_Suite *m parser->parser->charset = XML_CHAR_ENCODING_NONE; #endif + php_libxml_sanitize_parse_ctxt_options(parser->parser); #if LIBXML_VERSION >= 20703 xmlCtxtUseOptions(parser->parser, XML_PARSE_OLDSAX); #endif diff --git a/ext/xmlreader/php_xmlreader.c b/ext/xmlreader/php_xmlreader.c index 0f2b62ae20..f0fd013ff9 100644 --- a/ext/xmlreader/php_xmlreader.c +++ b/ext/xmlreader/php_xmlreader.c @@ -301,6 +301,7 @@ static xmlRelaxNGPtr _xmlreader_get_relaxNG(char *source, size_t source_len, siz return NULL; } + PHP_LIBXML_SANITIZE_GLOBALS(parse); if (error_func || warn_func) { xmlRelaxNGSetParserErrors(parser, (xmlRelaxNGValidityErrorFunc) error_func, @@ -309,6 +310,7 @@ static xmlRelaxNGPtr _xmlreader_get_relaxNG(char *source, size_t source_len, siz } sptr = xmlRelaxNGParse(parser); xmlRelaxNGFreeParserCtxt(parser); + PHP_LIBXML_RESTORE_GLOBALS(parse); return sptr; } @@ -881,7 +883,9 @@ PHP_METHOD(xmlreader, open) valid_file = _xmlreader_get_valid_file_path(source, resolved_path, MAXPATHLEN ); if (valid_file) { + PHP_LIBXML_SANITIZE_GLOBALS(reader_for_file); reader = xmlReaderForFile(valid_file, encoding, options); + PHP_LIBXML_RESTORE_GLOBALS(reader_for_file); } if (reader == NULL) { @@ -959,7 +963,9 @@ PHP_METHOD(xmlreader, setSchema) intern = Z_XMLREADER_P(id); if (intern && intern->ptr) { + PHP_LIBXML_SANITIZE_GLOBALS(schema); retval = xmlTextReaderSchemaValidate(intern->ptr, source); + PHP_LIBXML_RESTORE_GLOBALS(schema); if (retval == 0) { RETURN_TRUE; @@ -1079,6 +1085,7 @@ PHP_METHOD(xmlreader, XML) } uri = (char *) xmlCanonicPath((const xmlChar *) resolved_path); } + PHP_LIBXML_SANITIZE_GLOBALS(text_reader); reader = xmlNewTextReader(inputbfr, uri); if (reader != NULL) { @@ -1099,9 +1106,11 @@ PHP_METHOD(xmlreader, XML) xmlFree(uri); } + PHP_LIBXML_RESTORE_GLOBALS(text_reader); return; } } + PHP_LIBXML_RESTORE_GLOBALS(text_reader); } if (uri) { diff --git a/ext/xsl/xsltprocessor.c b/ext/xsl/xsltprocessor.c index 9948d6f0b3..7bbe640a5c 100644 --- a/ext/xsl/xsltprocessor.c +++ b/ext/xsl/xsltprocessor.c @@ -396,7 +396,7 @@ PHP_FUNCTION(xsl_xsltprocessor_import_stylesheet) xmlDoc *doc = NULL, *newdoc = NULL; xsltStylesheetPtr sheetp, oldsheetp; xsl_object *intern; - int prevSubstValue, prevExtDtdValue, clone_docu = 0; + int clone_docu = 0; xmlNode *nodep = NULL; zend_object_handlers *std_hnd; zval *cloneDocu, member, rv; @@ -419,13 +419,12 @@ PHP_FUNCTION(xsl_xsltprocessor_import_stylesheet) stylesheet document otherwise the node proxies will be a mess */ newdoc = xmlCopyDoc(doc, 1); xmlNodeSetBase((xmlNodePtr) newdoc, (xmlChar *)doc->URL); - prevSubstValue = xmlSubstituteEntitiesDefault(1); - prevExtDtdValue = xmlLoadExtDtdDefaultValue; + PHP_LIBXML_SANITIZE_GLOBALS(parse); + xmlSubstituteEntitiesDefault(1); xmlLoadExtDtdDefaultValue = XML_DETECT_IDS | XML_COMPLETE_ATTRS; sheetp = xsltParseStylesheetDoc(newdoc); - xmlSubstituteEntitiesDefault(prevSubstValue); - xmlLoadExtDtdDefaultValue = prevExtDtdValue; + PHP_LIBXML_RESTORE_GLOBALS(parse); if (!sheetp) { xmlFreeDoc(newdoc); -- 2.41.0 From 3ac0ce8a462cb31815330d1410e1a8a615c395eb Mon Sep 17 00:00:00 2001 From: Remi Collet Date: Tue, 1 Aug 2023 07:22:33 +0200 Subject: [PATCH 3/3] NEWS (cherry picked from commit ef1d507acf7be23d7624dc3c891683b2218feb51) (cherry picked from commit 3cf7c2b10e577136b267f2d90bfdff6743271c5c) (cherry picked from commit 79c0bf87711036b83f8ee1723c034ccc839d847b) --- NEWS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/NEWS b/NEWS index e7cdaa7698..99387fbfd7 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,16 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Backported from 8.0.30 + +- Libxml: + . Fixed bug GHSA-3qrf-m4j2-pcrr (Security issue with external entity loading + in XML without enabling it). (CVE-2023-3823) (nielsdos, ilutov) + +- Phar: + . Fixed bug GHSA-jqcx-ccgc-xwhv (Buffer mismanagement in phar_dir_read()). + (CVE-2023-3824) (nielsdos) + Backported from 8.0.29 - Soap: -- 2.41.0