Skip to content

Commit

Permalink
utf8.h: Split a macro into components
Browse files Browse the repository at this point in the history
This creates an internal macro that skips some error checking for use
when we don't care if it is completely well-formed or not.
  • Loading branch information
khwilliamson committed Feb 20, 2025
1 parent cea23d3 commit 880bac9
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -1065,13 +1065,18 @@ this macro matches
#define UTF_START_BYTE_110000_ UTF_START_BYTE(PERL_UNICODE_MAX + 1, 21)
#define UTF_FIRST_CONT_BYTE_110000_ \
UTF_FIRST_CONT_BYTE(PERL_UNICODE_MAX + 1, 21)

/* Internal macro when we don't care about it being well-formed, and know we
* have two bytes available to read */
#define UTF8_IS_SUPER_NO_CHECK_(s) \
( NATIVE_UTF8_TO_I8(s[0]) >= UTF_START_BYTE_110000_ \
&& ( NATIVE_UTF8_TO_I8(s[0]) > UTF_START_BYTE_110000_ \
|| NATIVE_UTF8_TO_I8(s[1]) >= UTF_FIRST_CONT_BYTE_110000_))

#define UTF8_IS_SUPER(s, e) \
( ((e) - (s)) >= UNISKIP_BY_MSB_(20) \
&& ( NATIVE_UTF8_TO_I8(s[0]) >= UTF_START_BYTE_110000_ \
&& ( NATIVE_UTF8_TO_I8(s[0]) > UTF_START_BYTE_110000_ \
|| NATIVE_UTF8_TO_I8(s[1]) >= UTF_FIRST_CONT_BYTE_110000_))) \
((((e) - (s)) >= UNISKIP_BY_MSB_(20) && UTF8_IS_SUPER_NO_CHECK_(s)) \
? isUTF8_CHAR(s, e) \
: 0
: 0)

/*
=for apidoc Am|bool|UNICODE_IS_NONCHAR|const UV uv
Expand Down

0 comments on commit 880bac9

Please sign in to comment.