Skip to content
This repository has been archived by the owner on Jul 26, 2024. It is now read-only.

Commit

Permalink
Made multibyte handling manual for performance
Browse files Browse the repository at this point in the history
Application of mb_ functions in last commit would cause massive
performance degredation as experienced before. This instead
manually buffers up multibyte characters when parsing to retain
performance.
  • Loading branch information
ssddanbrown committed Jan 24, 2022
1 parent 40d1a8a commit 58f8185
Showing 1 changed file with 22 additions and 3 deletions.
25 changes: 22 additions & 3 deletions src/WordSplitter.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,29 @@ public static function convertHtmlToListOfWords(string $text, array $blockExpres
$isGrouping = false;
$groupingUntil = -1;

$length = mb_strlen($text);
$length = strlen($text);
$mbCharLength = 0;
$character = "";
for ($index = 0; $index < $length; $index++)
{
$character = mb_substr($text, $index, 1);
$currentCharacter = substr($text, $index, 1);

// Join multibyte characters together if we're in one
if ($mbCharLength > 1) {
$mbCharLength--;
$character .= $currentCharacter;
if ($mbCharLength !== 1) {
continue;
}
} else {
// Check if we're in a multibyte character
$currentCharVal = ord($currentCharacter);
$character = $currentCharacter;
if ($currentCharVal >= 192) {
$mbCharLength = ($currentCharVal >= 240) ? 4 : (($currentCharVal >= 224) ? 3 : 2);
continue;
}
}

// Don't bother executing block checks if we don't have any blocks to check for!
if ($isBlockCheckRequired) {
Expand Down Expand Up @@ -72,7 +91,7 @@ public static function convertHtmlToListOfWords(string $text, array $blockExpres
$currentWord = $character;
$mode = Mode::WHITESPACE;
} else if (Utils::isWord($character) &&
(strlen($currentWord) === 0) || Utils::isWord(mb_substr($currentWord, -1))) {
(strlen($currentWord) === 0) || Utils::isWord(substr($currentWord, -1))) {
$currentWord .= $character;
} else {
if (strlen($currentWord) !== 0) {
Expand Down

0 comments on commit 58f8185

Please sign in to comment.