Skip to content

Commit

Permalink
Update GB-18030 encoding table
Browse files Browse the repository at this point in the history
https://bugs.webkit.org/show_bug.cgi?id=257770
rdar://110353061

Reviewed by Myles C. Maxfield.

This was already done internally in ICU in rdar://107702106
This reflects changes published as GB-18030-2022
This was proposed as a change to the standard at whatwg/encoding#312
This fixes an assertion when running encoding tests on macOS Sonoma and iOS 17,
and I added test coverage specific to the 18 changed code points.

* LayoutTests/imported/w3c/web-platform-tests/encoding/legacy-mb-schinese/gb18030/gb18030-encoder-expected.txt:
* LayoutTests/imported/w3c/web-platform-tests/encoding/legacy-mb-schinese/gb18030/gb18030-encoder.html:
* Source/WTF/wtf/PlatformHave.h:
* Source/WebCore/PAL/pal/text/EncodingTables.cpp:
(PAL::gb18030):

Canonical link: https://commits.webkit.org/264918@main
  • Loading branch information
achristensen07 committed Jun 7, 2023
1 parent a0d7223 commit b7d4f07
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,42 @@ PASS gb18030 encoder: Ranges pointer special case
PASS gb18030 encoder: legacy ICU special case 1
PASS gb18030 encoder: legacy ICU special case 2
PASS gb18030 encoder: legacy ICU special case 3
PASS gb18030 encoder: GB18030-2022 1
PASS gb18030 encoder: GB18030-2022 2
PASS gb18030 encoder: GB18030-2022 3
PASS gb18030 encoder: GB18030-2022 4
PASS gb18030 encoder: GB18030-2022 5
PASS gb18030 encoder: GB18030-2022 6
PASS gb18030 encoder: GB18030-2022 7
PASS gb18030 encoder: GB18030-2022 8
PASS gb18030 encoder: GB18030-2022 9
PASS gb18030 encoder: GB18030-2022 10
PASS gb18030 encoder: GB18030-2022 11
PASS gb18030 encoder: GB18030-2022 12
PASS gb18030 encoder: GB18030-2022 13
PASS gb18030 encoder: GB18030-2022 14
PASS gb18030 encoder: GB18030-2022 15
PASS gb18030 encoder: GB18030-2022 16
PASS gb18030 encoder: GB18030-2022 17
PASS gb18030 encoder: GB18030-2022 18
PASS gb18030 encoder: GB18030-2005 1
PASS gb18030 encoder: GB18030-2005 2
PASS gb18030 encoder: GB18030-2005 3
PASS gb18030 encoder: GB18030-2005 4
PASS gb18030 encoder: GB18030-2005 5
PASS gb18030 encoder: GB18030-2005 6
PASS gb18030 encoder: GB18030-2005 7
PASS gb18030 encoder: GB18030-2005 8
PASS gb18030 encoder: GB18030-2005 9
PASS gb18030 encoder: GB18030-2005 10
PASS gb18030 encoder: GB18030-2005 11
PASS gb18030 encoder: GB18030-2005 12
PASS gb18030 encoder: GB18030-2005 13
PASS gb18030 encoder: GB18030-2005 14
PASS gb18030 encoder: GB18030-2005 15
PASS gb18030 encoder: GB18030-2005 16
PASS gb18030 encoder: GB18030-2005 17
PASS gb18030 encoder: GB18030-2005 18
PASS gb18030 encoder: range 0
PASS gb18030 encoder: range 1
PASS gb18030 encoder: range 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,44 @@
encode("\u2026", "%A1%AD", "legacy ICU special case 2");
encode("\uFF5E", "%A1%AB", "legacy ICU special case 3");

encode("\uFE10", "%A6%D9", "GB18030-2022 1");
encode("\uFE12", "%A6%DA", "GB18030-2022 2");
encode("\uFE11", "%A6%DB", "GB18030-2022 3");
encode("\uFE13", "%A6%DC", "GB18030-2022 4");
encode("\uFE14", "%A6%DD", "GB18030-2022 5");
encode("\uFE15", "%A6%DE", "GB18030-2022 6");
encode("\uFE16", "%A6%DF", "GB18030-2022 7");
encode("\uFE17", "%A6%EC", "GB18030-2022 8");
encode("\uFE18", "%A6%ED", "GB18030-2022 9");
encode("\uFE19", "%A6%F3", "GB18030-2022 10");
encode("\u9FB4", "%FEY", "GB18030-2022 11");
encode("\u9FB5", "%FEa", "GB18030-2022 12");
encode("\u9FB6", "%FEf", "GB18030-2022 13");
encode("\u9FB7", "%FEg", "GB18030-2022 14");
encode("\u9FB8", "%FEm", "GB18030-2022 15");
encode("\u9FB9", "%FE~", "GB18030-2022 16");
encode("\u9FBA", "%FE%90", "GB18030-2022 17");
encode("\u9FBB", "%FE%A0", "GB18030-2022 18");

encode("\uE78D", "%836%CB2", "GB18030-2005 1");
encode("\uE78E", "%836%CB3", "GB18030-2005 2");
encode("\uE78F", "%836%CB4", "GB18030-2005 3");
encode("\uE790", "%836%CB5", "GB18030-2005 4");
encode("\uE791", "%836%CB6", "GB18030-2005 5");
encode("\uE792", "%836%CB7", "GB18030-2005 6");
encode("\uE793", "%836%CB8", "GB18030-2005 7");
encode("\uE794", "%836%CB9", "GB18030-2005 8");
encode("\uE795", "%836%CC0", "GB18030-2005 9");
encode("\uE796", "%836%CC1", "GB18030-2005 10");
encode("\uE81E", "%836%CA0", "GB18030-2005 11");
encode("\uE826", "%836%CA7", "GB18030-2005 12");
encode("\uE82B", "%836%CB1", "GB18030-2005 13");
encode("\uE82C", "%836%CB2", "GB18030-2005 14");
encode("\uE832", "%836%CB6", "GB18030-2005 15");
encode("\uE843", "%836%CD0", "GB18030-2005 16");
encode("\uE854", "%836%CE6", "GB18030-2005 17");
encode("\uE864", "%836%D00", "GB18030-2005 18");

const upperCaseNibble = x => {
return Math.floor(x).toString(16).toUpperCase();
}
Expand Down
1 change: 1 addition & 0 deletions Source/WTF/wtf/PlatformHave.h
Original file line number Diff line number Diff line change
Expand Up @@ -1461,6 +1461,7 @@
#define HAVE_STRICT_DECODABLE_CNCONTACT 1
#define HAVE_STRICT_DECODABLE_PKCONTACT 1
#define HAVE_STRICT_DECODABLE_NSTEXTTABLE 1
#define HAVE_GB_18030_2022 1
#endif

#if ((PLATFORM(MAC) && __MAC_OS_X_VERSION_MIN_REQUIRED >= 140000) \
Expand Down
45 changes: 35 additions & 10 deletions Source/WebCore/PAL/pal/text/EncodingTables.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7551,9 +7551,9 @@ const std::array<UChar, 23940> gb18030Reference { {
0xE700, 0xE701, 0xE702, 0xE703, 0xE704, 0xE705, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A,
0x039B, 0x039C, 0x039D, 0x039E, 0x039F, 0x03A0, 0x03A1, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0xE785, 0xE786,
0xE787, 0xE788, 0xE789, 0xE78A, 0xE78B, 0xE78C, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA,
0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, 0x03C0, 0x03C1, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0xE78D, 0xE78E,
0xE78F, 0xE790, 0xE791, 0xE792, 0xE793, 0xFE35, 0xFE36, 0xFE39, 0xFE3A, 0xFE3F, 0xFE40, 0xFE3D, 0xFE3E, 0xFE41, 0xFE42, 0xFE43,
0xFE44, 0xE794, 0xE795, 0xFE3B, 0xFE3C, 0xFE37, 0xFE38, 0xFE31, 0xE796, 0xFE33, 0xFE34, 0xE797, 0xE798, 0xE799, 0xE79A, 0xE79B,
0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, 0x03C0, 0x03C1, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0xFE10, 0xFE12,
0xFE11, 0xFE13, 0xFE14, 0xFE15, 0xFE16, 0xFE35, 0xFE36, 0xFE39, 0xFE3A, 0xFE3F, 0xFE40, 0xFE3D, 0xFE3E, 0xFE41, 0xFE42, 0xFE43,
0xFE44, 0xFE17, 0xFE18, 0xFE3B, 0xFE3C, 0xFE37, 0xFE38, 0xFE31, 0xFE19, 0xFE33, 0xFE34, 0xE797, 0xE798, 0xE799, 0xE79A, 0xE79B,
0xE79C, 0xE79D, 0xE79E, 0xE79F, 0xE706, 0xE707, 0xE708, 0xE709, 0xE70A, 0xE70B, 0xE70C, 0xE70D, 0xE70E, 0xE70F, 0xE710, 0xE711,
0xE712, 0xE713, 0xE714, 0xE715, 0xE716, 0xE717, 0xE718, 0xE719, 0xE71A, 0xE71B, 0xE71C, 0xE71D, 0xE71E, 0xE71F, 0xE720, 0xE721,
0xE722, 0xE723, 0xE724, 0xE725, 0xE726, 0xE727, 0xE728, 0xE729, 0xE72A, 0xE72B, 0xE72C, 0xE72D, 0xE72E, 0xE72F, 0xE730, 0xE731,
Expand Down Expand Up @@ -8588,12 +8588,12 @@ const std::array<UChar, 23940> gb18030Reference { {
0xE442, 0xE443, 0xE444, 0xE445, 0xE446, 0xE447, 0xE448, 0xE449, 0xE44A, 0xE44B, 0xE44C, 0xE44D, 0xE44E, 0xE44F, 0xE450, 0xE451,
0xE452, 0xE453, 0xE454, 0xE455, 0xE456, 0xE457, 0xE458, 0xE459, 0xE45A, 0xE45B, 0xE45C, 0xE45D, 0xE45E, 0xE45F, 0xE460, 0xE461,
0xE462, 0xE463, 0xE464, 0xE465, 0xE466, 0xE467, 0xFA0C, 0xFA0D, 0xFA0E, 0xFA0F, 0xFA11, 0xFA13, 0xFA14, 0xFA18, 0xFA1F, 0xFA20,
0xFA21, 0xFA23, 0xFA24, 0xFA27, 0xFA28, 0xFA29, 0x2E81, 0xE816, 0xE817, 0xE818, 0x2E84, 0x3473, 0x3447, 0x2E88, 0x2E8B, 0xE81E,
0x359E, 0x361A, 0x360E, 0x2E8C, 0x2E97, 0x396E, 0x3918, 0xE826, 0x39CF, 0x39DF, 0x3A73, 0x39D0, 0xE82B, 0xE82C, 0x3B4E, 0x3C6E,
0x3CE0, 0x2EA7, 0xE831, 0xE832, 0x2EAA, 0x4056, 0x415F, 0x2EAE, 0x4337, 0x2EB3, 0x2EB6, 0x2EB7, 0xE83B, 0x43B1, 0x43AC, 0x2EBB,
0x43DD, 0x44D6, 0x4661, 0x464C, 0xE843, 0x4723, 0x4729, 0x477C, 0x478D, 0x2ECA, 0x4947, 0x497A, 0x497D, 0x4982, 0x4983, 0x4985,
0x4986, 0x499F, 0x499B, 0x49B7, 0x49B6, 0xE854, 0xE855, 0x4CA3, 0x4C9F, 0x4CA0, 0x4CA1, 0x4C77, 0x4CA2, 0x4D13, 0x4D14, 0x4D15,
0x4D16, 0x4D17, 0x4D18, 0x4D19, 0x4DAE, 0xE864, 0xE468, 0xE469, 0xE46A, 0xE46B, 0xE46C, 0xE46D, 0xE46E, 0xE46F, 0xE470, 0xE471,
0xFA21, 0xFA23, 0xFA24, 0xFA27, 0xFA28, 0xFA29, 0x2E81, 0xE816, 0xE817, 0xE818, 0x2E84, 0x3473, 0x3447, 0x2E88, 0x2E8B, 0x9FB4,
0x359E, 0x361A, 0x360E, 0x2E8C, 0x2E97, 0x396E, 0x3918, 0x9FB5, 0x39CF, 0x39DF, 0x3A73, 0x39D0, 0x9FB6, 0x9FB7, 0x3B4E, 0x3C6E,
0x3CE0, 0x2EA7, 0xE831, 0x9FB8, 0x2EAA, 0x4056, 0x415F, 0x2EAE, 0x4337, 0x2EB3, 0x2EB6, 0x2EB7, 0xE83B, 0x43B1, 0x43AC, 0x2EBB,
0x43DD, 0x44D6, 0x4661, 0x464C, 0x9FB9, 0x4723, 0x4729, 0x477C, 0x478D, 0x2ECA, 0x4947, 0x497A, 0x497D, 0x4982, 0x4983, 0x4985,
0x4986, 0x499F, 0x499B, 0x49B7, 0x49B6, 0x9FBA, 0xE855, 0x4CA3, 0x4C9F, 0x4CA0, 0x4CA1, 0x4C77, 0x4CA2, 0x4D13, 0x4D14, 0x4D15,
0x4D16, 0x4D17, 0x4D18, 0x4D19, 0x4DAE, 0x9FBB, 0xE468, 0xE469, 0xE46A, 0xE46B, 0xE46C, 0xE46D, 0xE46E, 0xE46F, 0xE470, 0xE471,
0xE472, 0xE473, 0xE474, 0xE475, 0xE476, 0xE477, 0xE478, 0xE479, 0xE47A, 0xE47B, 0xE47C, 0xE47D, 0xE47E, 0xE47F, 0xE480, 0xE481,
0xE482, 0xE483, 0xE484, 0xE485, 0xE486, 0xE487, 0xE488, 0xE489, 0xE48A, 0xE48B, 0xE48C, 0xE48D, 0xE48E, 0xE48F, 0xE490, 0xE491,
0xE492, 0xE493, 0xE494, 0xE495, 0xE496, 0xE497, 0xE498, 0xE499, 0xE49A, 0xE49B, 0xE49C, 0xE49D, 0xE49E, 0xE49F, 0xE4A0, 0xE4A1,
Expand Down Expand Up @@ -8629,7 +8629,32 @@ const std::array<UChar, 23940>& gb18030()
// This is a difference between ICU and the encoding specification.
ASSERT((*array)[6555] == 0xe5e5);
(*array)[6555] = 0x3000;


#if !HAVE(GB_18030_2022)
static std::array<std::pair<size_t, UChar>, 18> gb18030_2022Differences { {
{ 7182, 0xfe10 },
{ 7183, 0xfe12 },
{ 7184, 0xfe11 },
{ 7185, 0xfe13 },
{ 7186, 0xfe14 },
{ 7187, 0xfe15 },
{ 7188, 0xfe16 },
{ 7201, 0xfe17 },
{ 7202, 0xfe18 },
{ 7208, 0xfe19 },
{ 23775, 0x9fb4 },
{ 23783, 0x9fb5 },
{ 23788, 0x9fb6 },
{ 23789, 0x9fb7 },
{ 23795, 0x9fb8 },
{ 23812, 0x9fb9 },
{ 23829, 0x9fba },
{ 23845, 0x9fbb }
} };
for (auto& pair : gb18030_2022Differences)
(*array)[pair.first] = pair.second;
#endif // HAVE(GB_18030_2022)

ASSERT(*array == gb18030Reference);
});
return *array;
Expand Down

0 comments on commit b7d4f07

Please sign in to comment.