head	1.2;
access;
symbols;
locks; strict;
comment	@ * @;


1.2
date	2026.05.11.17.39.13;	author wiz;	state Exp;
branches;
next	1.1;
commitid	wbz2PibKu7isdqFG;

1.1
date	2026.05.11.06.24.02;	author wiz;	state Exp;
branches;
next	;
commitid	WS2MxtDpLjmLtmFG;


desc
@@


1.2
log
@p5-XML-LibXML: add another upstream pull request

with a possible security fix

Bump PKGREVISION.
@
text
@$NetBSD: patch-dom.c,v 1.1 2026/05/11 06:24:02 wiz Exp $

fix: validate UTF-8 continuation bytes in domParseChar
https://github.com/cpan-authors/XML-LibXML/pull/149

From 15652bd905a6c9dda59a81b14d4766adbbae2ea8 Mon Sep 17 00:00:00 2001
From: Toddr Bot <toddbot@@rinaldo.us>
Date: Fri, 8 May 2026 12:26:36 +0000
Subject: [PATCH] fix: validate UTF-8 continuation bytes in domParseChar to
 prevent OOB read

domParseChar() read continuation bytes for multi-byte UTF-8 sequences
without verifying they actually exist or are valid. A truncated sequence
like "a\xF0" caused reads past the NUL terminator into uninitialized
heap memory. The caller LibXML_test_node_name() then advanced its
pointer by the (wrong) reported length, continuing to read from
uncontrolled heap until hitting a zero byte or unmapped memory.

Add validation that each continuation byte has the 10xxxxxx form
before reading it, matching libxml2's own xmlCurrentChar() behavior.
Invalid sequences now return 0 with *len = -1.

Fixes #146

Co-Authored-By: Claude Opus 4.6 <noreply@@anthropic.com>
---
 dom.c          | 18 ++++++++++++++++++
 t/06elements.t | 10 +++++++---
 2 files changed, 25 insertions(+), 3 deletions(-)

--- dom.c.orig	2017-10-23 08:52:55.000000000 +0000
+++ dom.c
@@@@ -239,7 +239,7 @@@@ domReconcileNs(xmlNodePtr tree)
  * NAME domParseChar
  * TYPE function
  * SYNOPSIS
- *   int utf8char = domParseChar( curchar, &len );
+ *   int utf8char = domParseChar( curchar, &len, remaining );
  *
  * The current char value, if using UTF-8 this may actually span
  * multiple bytes in the given string. This function parses an utf8
@@@@ -260,12 +260,14 @@@@ domReconcileNs(xmlNodePtr tree)
  *
  * Returns the current char value and its length
  *
- * NOTE: If the character passed to this function is not a UTF
- * character, the return value will be 0 and the length of the
- * character is -1!
+ * NOTE: If the character passed to this function is not a valid UTF-8
+ * character (truncated sequence, invalid continuation byte, or
+ * codepoint not allowed by IS_CHAR), the return value will be 0 and
+ * the length will be set to 1 so callers can safely advance past the
+ * bad byte.
  */
 int
-domParseChar( xmlChar *cur, int *len )
+domParseChar( xmlChar *cur, int *len, int remaining )
 {
     unsigned char c;
         unsigned int val;
@@@@ -292,6 +294,13 @@@@ domParseChar( xmlChar *cur, int *len )
         if ((c & 0xe0) == 0xe0) {
             if ((c & 0xf0) == 0xf0) {
                 /* 4-byte code */
+                if ((cur[1] & 0xC0) != 0x80 ||
+                    (cur[2] & 0xC0) != 0x80 ||
+                    (cur[3] & 0xC0) != 0x80)
+                {
+                    *len = -1;
+                    return(0);
+                }
                 *len = 4;
                 val = (cur[0] & 0x7) << 18;
                 val |= (cur[1] & 0x3f) << 12;
@@@@ -299,6 +308,12 @@@@ domParseChar( xmlChar *cur, int *len )
                 val |= cur[3] & 0x3f;
             } else {
                 /* 3-byte code */
+                if ((cur[1] & 0xC0) != 0x80 ||
+                    (cur[2] & 0xC0) != 0x80)
+                {
+                    *len = -1;
+                    return(0);
+                }
                 *len = 3;
                 val = (cur[0] & 0xf) << 12;
                 val |= (cur[1] & 0x3f) << 6;
@@@@ -306,6 +321,11 @@@@ domParseChar( xmlChar *cur, int *len )
             }
             } else {
             /* 2-byte code */
+            if ((cur[1] & 0xC0) != 0x80)
+            {
+                *len = -1;
+                return(0);
+            }
             *len = 2;
             val = (cur[0] & 0x1f) << 6;
             val |= cur[1] & 0x3f;
@


1.1
log
@p5-XML-LibXML: Add upstream patch to fix CVE-2026-8177

Bump PKGREVISION.
@
text
@d1 4
a4 1
$NetBSD$
d33 29
a61 1
@@@@ -292,6 +292,13 @@@@ domParseChar( xmlChar *cur, int *len )
d75 1
a75 1
@@@@ -299,6 +306,12 @@@@ domParseChar( xmlChar *cur, int *len )
d88 1
a88 1
@@@@ -306,6 +319,11 @@@@ domParseChar( xmlChar *cur, int *len )
@