From 880bac98fcd850f826526320e7e3570fe8b779e7 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sun, 9 Feb 2025 20:47:13 -0700 Subject: [PATCH] utf8.h: Split a macro into components This creates an internal macro that skips some error checking for use when we don't care if it is completely well-formed or not. --- utf8.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/utf8.h b/utf8.h index 34e055e06892..ef09557642b3 100644 --- a/utf8.h +++ b/utf8.h @@ -1065,13 +1065,18 @@ this macro matches #define UTF_START_BYTE_110000_ UTF_START_BYTE(PERL_UNICODE_MAX + 1, 21) #define UTF_FIRST_CONT_BYTE_110000_ \ UTF_FIRST_CONT_BYTE(PERL_UNICODE_MAX + 1, 21) + +/* Internal macro when we don't care about it being well-formed, and know we + * have two bytes available to read */ +#define UTF8_IS_SUPER_NO_CHECK_(s) \ + ( NATIVE_UTF8_TO_I8(s[0]) >= UTF_START_BYTE_110000_ \ + && ( NATIVE_UTF8_TO_I8(s[0]) > UTF_START_BYTE_110000_ \ + || NATIVE_UTF8_TO_I8(s[1]) >= UTF_FIRST_CONT_BYTE_110000_)) + #define UTF8_IS_SUPER(s, e) \ - ( ((e) - (s)) >= UNISKIP_BY_MSB_(20) \ - && ( NATIVE_UTF8_TO_I8(s[0]) >= UTF_START_BYTE_110000_ \ - && ( NATIVE_UTF8_TO_I8(s[0]) > UTF_START_BYTE_110000_ \ - || NATIVE_UTF8_TO_I8(s[1]) >= UTF_FIRST_CONT_BYTE_110000_))) \ + ((((e) - (s)) >= UNISKIP_BY_MSB_(20) && UTF8_IS_SUPER_NO_CHECK_(s)) \ ? isUTF8_CHAR(s, e) \ - : 0 + : 0) /* =for apidoc Am|bool|UNICODE_IS_NONCHAR|const UV uv