diff --git a/src/FontLib/Table/DirectoryEntry.php b/src/FontLib/Table/DirectoryEntry.php index 86500e1..b41fdd0 100644 --- a/src/FontLib/Table/DirectoryEntry.php +++ b/src/FontLib/Table/DirectoryEntry.php @@ -49,20 +49,8 @@ static function computeChecksum($data) { $data = str_pad($data, $len + (4 - $mod), "\0"); } - $len = mb_strlen($data, '8bit'); - - $hi = 0x0000; - $lo = 0x0000; - - for ($i = 0; $i < $len; $i += 4) { - $hi += (ord($data[$i]) << 8) + ord($data[$i + 1]); - $lo += (ord($data[$i + 2]) << 8) + ord($data[$i + 3]); - $hi += $lo >> 16; - $lo = $lo & 0xFFFF; - $hi = $hi & 0xFFFF; - } - - return ($hi << 8) + $lo; + $table = unpack("N*", $data); + return array_sum($table); } function __construct(File $font) { @@ -93,6 +81,14 @@ function encode($entry_offset) { $this->offset = $table_offset; $table_length = $data->encode(); + $font->seek($table_offset + $table_length); + $pad = 0; + $mod = $table_length % 4; + if ($mod != 0) { + $pad = 4 - $mod; + $font->write(str_pad("", $pad, "\0"), $pad); + } + $font->seek($table_offset); $table_data = $font->read($table_length); @@ -105,7 +101,7 @@ function encode($entry_offset) { Font::d("Bytes written = $table_length"); - $font->seek($table_offset + $table_length); + $font->seek($table_offset + $table_length + $pad); } /** diff --git a/src/FontLib/Table/Type/cmap.php b/src/FontLib/Table/Type/cmap.php index a48b60b..a77df88 100644 --- a/src/FontLib/Table/Type/cmap.php +++ b/src/FontLib/Table/Type/cmap.php @@ -26,6 +26,18 @@ class cmap extends Table { "offset" => self::uint32, ); + private static $subtable_v2_format = array( + "length" => self::uint16, + "language" => self::uint16 + ); + + private static $subtable_v2_format_subheader = array( + "firstCode" => self::uint16, + "entryCount" => self::uint16, + "idDelta" => self::int16, + "idRangeOffset" => self::uint16 + ); + private static $subtable_v4_format = array( "length" => self::uint16, "language" => self::uint16, @@ -38,7 +50,7 @@ class cmap extends Table { private static $subtable_v12_format = array( "length" => self::uint32, "language" => self::uint32, - "ngroups" => self::uint32 + "ngroups" => self::uint32 ); protected function _parse() { @@ -60,105 +72,170 @@ protected function _parse() { $subtable["format"] = $font->readUInt16(); - // @todo Only CMAP version 4 and 12 - if (($subtable["format"] != 4) && ($subtable["format"] != 12)) { - unset($data["subtables"][$i]); - $data["numberSubtables"]--; - continue; - } - - if ($subtable["format"] == 12) { - - $font->readUInt16(); - - $subtable += $font->unpack(self::$subtable_v12_format); - - $glyphIndexArray = array(); - $endCodes = array(); - $startCodes = array(); - - for ($p = 0; $p < $subtable['ngroups']; $p++) { - - $startCode = $startCodes[] = $font->readUInt32(); - $endCode = $endCodes[] = $font->readUInt32(); - $startGlyphCode = $font->readUInt32(); - - for ($c = $startCode; $c <= $endCode; $c++) { - $glyphIndexArray[$c] = $startGlyphCode; - $startGlyphCode++; - } - } - - $subtable += array( - "startCode" => $startCodes, - "endCode" => $endCodes, - "glyphIndexArray" => $glyphIndexArray, - ); - - } - else if ($subtable["format"] == 4) { - - $subtable += $font->unpack(self::$subtable_v4_format); - - $segCount = $subtable["segCountX2"] / 2; - $subtable["segCount"] = $segCount; - - $endCode = $font->readUInt16Many($segCount); - - $font->readUInt16(); // reservedPad - - $startCode = $font->readUInt16Many($segCount); - $idDelta = $font->readInt16Many($segCount); - - $ro_start = $font->pos(); - $idRangeOffset = $font->readUInt16Many($segCount); - - $glyphIndexArray = array(); - for ($i = 0; $i < $segCount; $i++) { - $c1 = $startCode[$i]; - $c2 = $endCode[$i]; - $d = $idDelta[$i]; - $ro = $idRangeOffset[$i]; - - if ($ro > 0) { - $font->seek($subtable["offset"] + 2 * $i + $ro); + switch ($subtable["format"]) { + case 0: + case 6: + case 8: + case 10: + case 13: + case 14: + unset($data["subtables"][$i]); + $data["numberSubtables"]--; + continue 2; + + case 2: + $subtable += $font->unpack(self::$subtable_v2_format); + + $subHeaderKeys = array_map(function($val) { return $val / 8; }, $font->readUInt16Many(256)); + $subHeaders = array(); + + $glyphIdArray = array(); + $maxSubHeaderIndex = max($subHeaderKeys); + for ($i = 0; $i <= $maxSubHeaderIndex; $i++) { + $subHeader = $font->unpack(self::$subtable_v2_format_subheader); + $offset = $font->pos(); + $subHeader["glyphIdArrayOffset"] = $offset + $subHeader["idRangeOffset"] - 2; + $subHeaders[$i] = $subHeader; + + if (!\array_key_exists($subHeader["glyphIdArrayOffset"], $glyphIdArray) || count($glyphIdArray[$subHeader["glyphIdArrayOffset"]]) < $subHeader["entryCount"]) { + $font->seek($subHeader["glyphIdArrayOffset"]); + $glyphIdArray[$subHeader["glyphIdArrayOffset"]] = $font->readUInt16Many($subHeader["entryCount"]); + $font->seek($offset); + } } - for ($c = $c1; $c <= $c2; $c++) { - if ($c === 0xFFFF) { - continue; + $glyphIndexArray = array(); + foreach ($subHeaderKeys as $highByte => $subHeaderKey) { + $subHeader = $subHeaders[$subHeaderKey]; + if ($subHeaderKey === 0) { + $c = $highByte; + if ($c < $subHeader["firstCode"] || $c >= ($subHeader["firstCode"] + $subHeader["entryCount"])) { + $glyphIndexArray[$c] = 0; + continue; + } + $c = $highByte; + $index = $c - $subHeader["firstCode"]; + $glyphId = $glyphIdArray[$subHeader["glyphIdArrayOffset"]][$index]; + if ($glyphId === 0) { + $glyphIndexArray[$c] = 0; + } else { + $glyphIndexArray[$c] = ($glyphId + $subHeader["idDelta"]) & 0xFFFF; + } + } else { + for ($index = 0; $index < $subHeader["entryCount"]; $index++) { + $c = null; + $lowByte = $subHeader["firstCode"] + $index; + $c = (($highByte & 0xFF) << 8) | ($lowByte & 0xFF); + $glyphId = $glyphIdArray[$subHeader["glyphIdArrayOffset"]][$index]; + if ($glyphId === 0) { + $glyphIndexArray[$c] = 0; + } else { + $glyphIndexArray[$c] = ($glyphId + $subHeader["idDelta"]) & 0xFFFF; + } + } } + } - if ($ro == 0) { - $gid = ($c + $d) & 0xFFFF; + $subtable += array( + "subHeaderKeys" => $subHeaderKeys, + "subHeaders" => $subHeaders, + "glyphIdArray" => $glyphIdArray, + "glyphIndexArray" => $glyphIndexArray + ); + + break; + + case 4: + $subtable += $font->unpack(self::$subtable_v4_format); + + $segCount = $subtable["segCountX2"] / 2; + $subtable["segCount"] = $segCount; + + $endCode = $font->readUInt16Many($segCount); + + $font->readUInt16(); // reservedPad + + $startCode = $font->readUInt16Many($segCount); + $idDelta = $font->readInt16Many($segCount); + + $ro_start = $font->pos(); + $idRangeOffset = $font->readUInt16Many($segCount); + + $glyphIndexArray = array(); + for ($i = 0; $i < $segCount; $i++) { + $c1 = $startCode[$i]; + $c2 = $endCode[$i]; + $d = $idDelta[$i]; + $ro = $idRangeOffset[$i]; + + if ($ro > 0) { + $font->seek($subtable["offset"] + 2 * $i + $ro); } - else { - $offset = ($c - $c1) * 2 + $ro; - $offset = $ro_start + 2 * $i + $offset; - - $gid = 0; - if ($font->seek($offset) === true) { - $gid = $font->readUInt16(); + + for ($c = $c1; $c <= $c2; $c++) { + if ($c === 0xFFFF) { + continue; } - - if ($gid != 0) { - $gid = ($gid + $d) & 0xFFFF; + + if ($ro == 0) { + $gid = ($c + $d) & 0xFFFF; + } + else { + $offset = ($c - $c1) * 2 + $ro; + $offset = $ro_start + 2 * $i + $offset; + + $gid = 0; + if ($font->seek($offset) === true) { + $gid = $font->readUInt16(); + } + + if ($gid != 0) { + $gid = ($gid + $d) & 0xFFFF; + } + } + + if ($gid >= 0) { + $glyphIndexArray[$c] = $gid; } } - - if ($gid >= 0) { - $glyphIndexArray[$c] = $gid; + } + + $subtable += array( + "endCode" => $endCode, + "startCode" => $startCode, + "idDelta" => $idDelta, + "idRangeOffset" => $idRangeOffset, + "glyphIndexArray" => $glyphIndexArray + ); + break; + + case 12: + $font->readUInt16(); + + $subtable += $font->unpack(self::$subtable_v12_format); + + $glyphIndexArray = array(); + $endCodes = array(); + $startCodes = array(); + + for ($p = 0; $p < $subtable['ngroups']; $p++) { + + $startCode = $startCodes[] = $font->readUInt32(); + $endCode = $endCodes[] = $font->readUInt32(); + $startGlyphCode = $font->readUInt32(); + + for ($c = $startCode; $c <= $endCode; $c++) { + $glyphIndexArray[$c] = $startGlyphCode; + $startGlyphCode++; } } - } - - $subtable += array( - "endCode" => $endCode, - "startCode" => $startCode, - "idDelta" => $idDelta, - "idRangeOffset" => $idRangeOffset, - "glyphIndexArray" => $glyphIndexArray, - ); + + $subtable += array( + "startCode" => $startCodes, + "endCode" => $endCodes, + "glyphIndexArray" => $glyphIndexArray, + ); + break; } } @@ -202,7 +279,7 @@ function _encode() { $prevGid = $gid; } - $segments[][] = array(0xFFFF, 0xFFFF); + $segments[][] = array(0xFFFF, null); $startCode = array(); $endCode = array(); diff --git a/src/FontLib/Table/Type/glyf.php b/src/FontLib/Table/Type/glyf.php index b8f7c93..ad4a858 100644 --- a/src/FontLib/Table/Type/glyf.php +++ b/src/FontLib/Table/Type/glyf.php @@ -143,7 +143,16 @@ protected function _encode() { $length = 0; foreach ($subset as $gid) { $loca[] = $length; - $length += $data[$gid]->encode(); + + $bytes = $data[$gid]->encode(); + + $pad = 0; + $mod = $bytes % 4; + if ($mod != 0) { + $pad = 4 - $mod; + $font->write(str_pad("", $pad, "\0"), $pad); + } + $length += $bytes + $pad; } $loca[] = $length; // dummy loca diff --git a/src/FontLib/Table/Type/head.php b/src/FontLib/Table/Type/head.php index 3d15f37..0686508 100644 --- a/src/FontLib/Table/Type/head.php +++ b/src/FontLib/Table/Type/head.php @@ -43,4 +43,9 @@ protected function _parse() { throw new Exception("Incorrect magic number (" . dechex($this->data["magicNumber"]) . ")"); } } + + function _encode() { + $this->data["checkSumAdjustment"] = 0; + return parent::_encode(); + } } \ No newline at end of file diff --git a/src/FontLib/Table/Type/name.php b/src/FontLib/Table/Type/name.php index 9e77042..acdda7a 100644 --- a/src/FontLib/Table/Type/name.php +++ b/src/FontLib/Table/Type/name.php @@ -150,11 +150,49 @@ protected function _parse() { $records[] = $record; } + $system_encodings = mb_list_encodings(); + $system_encodings = array_change_key_case(array_fill_keys($system_encodings, true), CASE_UPPER); + $names = array(); foreach ($records as $record) { $font->seek($tableOffset + $data["stringOffset"] + $record->offset); - $s = $font->read($record->length); - $record->string = Font::UTF16ToUTF8($s); + $record->stringRaw = $font->read($record->length); + + $encoding = null; + switch ($record->platformID) { + case 3: + switch ($record->platformSpecificID) { + case 2: + if (\array_key_exists("SJIS", $system_encodings)) { + $encoding = "SJIS"; + } + break; + case 3: + if (\array_key_exists("GB18030", $system_encodings)) { + $encoding = "GB18030"; + } + break; + case 4: + if (\array_key_exists("BIG-5", $system_encodings)) { + $encoding = "BIG-5"; + } + break; + case 5: + if (\array_key_exists("UHC", $system_encodings)) { + $encoding = "UHC"; + } + break; + } + break; + } + if ($encoding === null) { + $encoding = "UTF-16"; + } + + $record->string = mb_convert_encoding($record->stringRaw, "UTF-8", $encoding); + if (strpos($record->string, "\0") !== false) { + $record->string = str_replace("\0", "", $record->string); + } $names[$record->nameID] = $record; } @@ -168,22 +206,33 @@ protected function _encode() { /** @var nameRecord[] $records */ $records = $this->data["records"]; - $count_records = count($records); + $count_records = \count($records); $this->data["count"] = $count_records; - $this->data["stringOffset"] = 6 + $count_records * 12; // 6 => uint16 * 3, 12 => sizeof self::$record_format + $this->data["stringOffset"] = 6 + ($count_records * 12); // 6 => uint16 * 3, 12 => sizeof self::$record_format $length = $font->pack(self::$header_format, $this->data); $offset = 0; + + /** @var nameRecord[] $records_to_encode */ + $records_to_encode = array(); foreach ($records as $record) { - $record->length = mb_strlen($record->getUTF16(), "8bit"); - $record->offset = $offset; - $offset += $record->length; - $length += $font->pack(nameRecord::$format, (array)$record); + $encoded_record = new nameRecord(); + $encoded_record->platformID = 3; + $encoded_record->platformSpecificID = 1; + $encoded_record->languageID = $record->languageID; + $encoded_record->nameID = $record->nameID; + $encoded_record->offset = $offset; + $encoded_record->string = $record->string; + $encoded_record->length = mb_strlen($encoded_record->getUTF16(), "8bit"); + $records_to_encode[] = $encoded_record; + + $offset += $encoded_record->length; + $length += $font->pack(nameRecord::$format, (array)$encoded_record); } - foreach ($records as $record) { + foreach ($records_to_encode as $record) { $str = $record->getUTF16(); $length += $font->write($str, mb_strlen($str, "8bit")); } diff --git a/src/FontLib/Table/Type/nameRecord.php b/src/FontLib/Table/Type/nameRecord.php index 38cd114..fe22ba3 100644 --- a/src/FontLib/Table/Type/nameRecord.php +++ b/src/FontLib/Table/Type/nameRecord.php @@ -23,6 +23,7 @@ class nameRecord extends BinaryStream { public $length; public $offset; public $string; + public $stringRaw; public static $format = array( "platformID" => self::uint16, diff --git a/src/FontLib/TrueType/File.php b/src/FontLib/TrueType/File.php index 446eac8..f32d067 100644 --- a/src/FontLib/TrueType/File.php +++ b/src/FontLib/TrueType/File.php @@ -101,6 +101,72 @@ class File extends BinaryStream { "Ccaron", "ccaron", "dmacron" ); + private function uniord (string $c, string $encoding = null) { + if (function_exists("mb_ord")) { + if (PHP_VERSION_ID < 80000 && $encoding === null) { + // in PHP < 8 the encoding argument, if supplied, must be a valid encoding + $encoding = "UTF-8"; + } + return mb_ord($c, $encoding); + } + + if ($encoding != "UTF-8" && $encoding !== null) { + $c = mb_convert_encoding($c, "UTF-8", $encoding); + } + + $length = mb_strlen(mb_substr($c, 0, 1), '8bit'); + $ord = false; + $bytes = []; + $numbytes = 1; + for ($i = 0; $i < $length; $i++) { + $o = \ord($c[$i]); // get one string character at time + if (\count($bytes) === 0) { // get starting octect + if ($o <= 0x7F) { + $ord = $o; + $numbytes = 1; + } elseif (($o >> 0x05) === 0x06) { // 2 bytes character (0x06 = 110 BIN) + $bytes[] = ($o - 0xC0) << 0x06; + $numbytes = 2; + } elseif (($o >> 0x04) === 0x0E) { // 3 bytes character (0x0E = 1110 BIN) + $bytes[] = ($o - 0xE0) << 0x0C; + $numbytes = 3; + } elseif (($o >> 0x03) === 0x1E) { // 4 bytes character (0x1E = 11110 BIN) + $bytes[] = ($o - 0xF0) << 0x12; + $numbytes = 4; + } else { + $ord = false; + break; + } + } elseif (($o >> 0x06) === 0x02) { // bytes 2, 3 and 4 must start with 0x02 = 10 BIN + $bytes[] = $o - 0x80; + if (\count($bytes) === $numbytes) { + // compose UTF-8 bytes to a single unicode value + $o = $bytes[0]; + for ($j = 1; $j < $numbytes; $j++) { + $o += ($bytes[$j] << (($numbytes - $j - 1) * 0x06)); + } + if ((($o >= 0xD800) and ($o <= 0xDFFF)) or ($o >= 0x10FFFF)) { + // The definition of UTF-8 prohibits encoding character numbers between + // U+D800 and U+DFFF, which are reserved for use with the UTF-16 + // encoding form (as surrogate pairs) and do not directly represent + // characters. + return false; + } else { + $ord = $o; // add char to array + } + // reset data for next char + $bytes = []; + $numbytes = 1; + } + } else { + $ord = false; + break; + } + } + + return $ord; + } + function getTable() { $this->parseTableEntries(); @@ -157,7 +223,7 @@ function utf8toUnicode($str) { function getUnicodeCharMap() { $subtable = null; foreach ($this->getData("cmap", "subtables") as $_subtable) { - if ($_subtable["platformID"] == 0 || $_subtable["platformID"] == 3 && $_subtable["platformSpecificID"] == 1) { + if ($_subtable["platformID"] == 0 || ($_subtable["platformID"] == 3 && $_subtable["platformSpecificID"] == 1)) { $subtable = $_subtable; break; } @@ -167,6 +233,51 @@ function getUnicodeCharMap() { return $subtable["glyphIndexArray"]; } + $system_encodings = mb_list_encodings(); + $system_encodings = array_change_key_case(array_fill_keys($system_encodings, true), CASE_UPPER); + foreach ($this->getData("cmap", "subtables") as $_subtable) { + $encoding = null; + switch ($_subtable["platformID"]) { + case 3: + switch ($_subtable["platformSpecificID"]) { + case 2: + if (\array_key_exists("SJIS", $system_encodings)) { + $encoding = "SJIS"; + } + break; + case 3: + if (\array_key_exists("GB18030", $system_encodings)) { + $encoding = "GB18030"; + } + break; + case 4: + if (\array_key_exists("BIG-5", $system_encodings)) { + $encoding = "BIG-5"; + } + break; + case 5: + if (\array_key_exists("UHC", $system_encodings)) { + $encoding = "UHC"; + } + break; + } + break; + } + if ($encoding) { + $glyphIndexArray = array(); + foreach ($_subtable["glyphIndexArray"] as $c => $gid) { + $str = trim(pack("N", $c)); + if (\strlen($str) > 0) { + $ord = $this->uniord($str, $encoding); + if ($ord > 0) { + $glyphIndexArray[$ord] = $gid; + } + } + } + return $glyphIndexArray; + } + } + return null; } @@ -239,8 +350,13 @@ function encode($tags = array()) { } $num_tables = count($entries); + $exponent = floor(log($num_tables, 2)); + $power_of_two = pow(2, $exponent); $this->header->data["numTables"] = $num_tables; + $this->header->data["searchRange"] = $power_of_two * 16; + $this->header->data["entrySelector"] = log($power_of_two, 2); + $this->header->data["rangeShift"] = $num_tables * 16 - $this->header->data["searchRange"]; $this->header->encode(); $directory_offset = $this->pos();