From 880bac98fcd850f826526320e7e3570fe8b779e7 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Sun, 9 Feb 2025 20:47:13 -0700
Subject: [PATCH] utf8.h: Split a macro into components

This creates an internal macro that skips some error checking for use
when we don't care if it is completely well-formed or not.
---
 utf8.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/utf8.h b/utf8.h
index 34e055e06892..ef09557642b3 100644
--- a/utf8.h
+++ b/utf8.h
@@ -1065,13 +1065,18 @@ this macro matches
 #define UTF_START_BYTE_110000_  UTF_START_BYTE(PERL_UNICODE_MAX + 1, 21)
 #define UTF_FIRST_CONT_BYTE_110000_                                         \
                           UTF_FIRST_CONT_BYTE(PERL_UNICODE_MAX + 1, 21)
+
+/* Internal macro when we don't care about it being well-formed, and know we
+ * have two bytes available to read */
+#define UTF8_IS_SUPER_NO_CHECK_(s)                                          \
+     (       NATIVE_UTF8_TO_I8(s[0]) >= UTF_START_BYTE_110000_              \
+      && (   NATIVE_UTF8_TO_I8(s[0]) >  UTF_START_BYTE_110000_              \
+          || NATIVE_UTF8_TO_I8(s[1]) >= UTF_FIRST_CONT_BYTE_110000_))
+
 #define UTF8_IS_SUPER(s, e)                                                 \
-    (   ((e) - (s)) >= UNISKIP_BY_MSB_(20)                                  \
-     && (       NATIVE_UTF8_TO_I8(s[0]) >= UTF_START_BYTE_110000_           \
-         && (   NATIVE_UTF8_TO_I8(s[0]) >  UTF_START_BYTE_110000_           \
-             || NATIVE_UTF8_TO_I8(s[1]) >= UTF_FIRST_CONT_BYTE_110000_)))   \
+    ((((e) - (s)) >= UNISKIP_BY_MSB_(20) && UTF8_IS_SUPER_NO_CHECK_(s))     \
      ? isUTF8_CHAR(s, e)                                                    \
-     : 0
+     : 0)
 
 /*
 =for apidoc Am|bool|UNICODE_IS_NONCHAR|const UV uv