Skip to content
This repository has been archived by the owner on Jul 26, 2024. It is now read-only.

Commit

Permalink
Removed unrequired use of mb_ functions
Browse files Browse the repository at this point in the history
These were slowing down content parsing by at least 20x,
Most were checks for non-empty content, Otherwise should not be
affected by not being mulit-byte managed.
  • Loading branch information
ssddanbrown committed Jan 24, 2021
1 parent d1978c7 commit 73f41bd
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 15 deletions.
2 changes: 2 additions & 0 deletions phpunit.xml.dist
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
backupStaticAttributes="false"
colors="true"
verbose="true"
enforceTimeLimit="false"
defaultTimeLimit="5"
convertErrorsToExceptions="true"
convertNoticesToExceptions="true"
convertWarningsToExceptions="true"
Expand Down
31 changes: 16 additions & 15 deletions src/WordSplitter.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ public static function convertHtmlToListOfWords(string $text, array $blockExpres
$isGrouping = false;
$groupingUntil = -1;

for ($index = 0; $index < mb_strlen($text); $index++)
$length = strlen($text);
for ($index = 0; $index < $length; $index++)
{
$character = mb_substr($text, $index, 1);
$character = substr($text, $index, 1);

// Don't bother executing block checks if we don't have any blocks to check for!
if ($isBlockCheckRequired) {
Expand Down Expand Up @@ -51,28 +52,28 @@ public static function convertHtmlToListOfWords(string $text, array $blockExpres
switch ($mode) {
case Mode::CHARACTER:
if (Utils::isStartOfTag($character)) {
if (mb_strlen($currentWord) !== 0) {
if (strlen($currentWord) !== 0) {
$words[] = $currentWord;
}
$currentWord = "<";
$mode = Mode::TAG;
} else if (Utils::isStartOfEntity($character)) {
if (mb_strlen($currentWord) !== 0) {
if (strlen($currentWord) !== 0) {
$words[] = $currentWord;
}
$currentWord = $character;
$mode = Mode::ENTITY;
} else if (Utils::isWhiteSpace($character)) {
if (mb_strlen($currentWord) !== 0) {
if (strlen($currentWord) !== 0) {
$words[] = $currentWord;
}
$currentWord = $character;
$mode = Mode::WHITESPACE;
} else if (Utils::isWord($character) &&
(mb_strlen($currentWord) === 0) || Utils::isWord(substr($currentWord, -1))) {
(strlen($currentWord) === 0) || Utils::isWord(substr($currentWord, -1))) {
$currentWord .= $character;
} else {
if (mb_strlen($currentWord) !== 0) {
if (strlen($currentWord) !== 0) {
$words[] = $currentWord;
}
$currentWord = $character;
Expand All @@ -96,21 +97,21 @@ public static function convertHtmlToListOfWords(string $text, array $blockExpres

if (Utils::isStartOfTag($character))
{
if (mb_strlen($currentWord) !== 0) {
if (strlen($currentWord) !== 0) {
$words[] = $currentWord;
}
$currentWord = $character;
$mode = Mode::TAG;
} else if (Utils::isStartOfEntity($character)) {
if (mb_strlen($currentWord) !== 0) {
if (strlen($currentWord) !== 0) {
$words[] = $currentWord;
}
$currentWord = $character;
$mode = Mode::ENTITY;
} else if (Utils::isWhiteSpace($character)) {
$currentWord .= $character;
} else {
if (mb_strlen($currentWord) !== 0) {
if (strlen($currentWord) !== 0) {
$words[] = $currentWord;
}
$currentWord = $character;
Expand All @@ -122,20 +123,20 @@ public static function convertHtmlToListOfWords(string $text, array $blockExpres

if (Utils::isStartOfTag($character))
{
if (mb_strlen($currentWord) !== 0) {
if (strlen($currentWord) !== 0) {
$words[] = $currentWord;
}
$currentWord = $character;
$mode = Mode::TAG;
} else if (Utils::isWhiteSpace($character)) {
if (mb_strlen($currentWord) !== 0) {
if (strlen($currentWord) !== 0) {
$words[] = $currentWord;
}
$currentWord = $character;
$mode = Mode::WHITESPACE;
} else if (Utils::isEndOfEntity($character)) {
$switchToNextMode = true;
if (mb_strlen($currentWord) !== 0) {
if (strlen($currentWord) !== 0) {
$currentWord .= $character;
$words[] = $currentWord;

Expand All @@ -158,7 +159,7 @@ public static function convertHtmlToListOfWords(string $text, array $blockExpres
} else if (Utils::isWord($character)) {
$currentWord .= $character;
} else {
if (mb_strlen($currentWord) !== 0) {
if (strlen($currentWord) !== 0) {
$words[] = $currentWord;
}
$currentWord = $character;
Expand All @@ -168,7 +169,7 @@ public static function convertHtmlToListOfWords(string $text, array $blockExpres
}
}

if (mb_strlen($currentWord) !== 0) {
if (strlen($currentWord) !== 0) {
$words[] = $currentWord;
}

Expand Down
24 changes: 24 additions & 0 deletions tests/HeavyContentTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php namespace Ssddanbrown\HtmlDiff\Tests;

use PHPUnit\Framework\TestCase;
use Ssddanbrown\HtmlDiff\Diff;

class HeavyContentTest extends TestCase
{

public function test_large_attribute_content()
{
$start = time();
$strToEncode = '';
for ($i = 0; $i < 10000; $i++) {
$strToEncode .= 'cattestingstring';
}
$a = '<p data-test="' . base64_encode($strToEncode) . '">contnent</p>';
$b = '<p data-test="' . base64_encode($strToEncode) . 'cat">contnent2</p>';

$output = Diff::excecute($a, $b);
$this->assertNotEmpty($output);
$this->assertLessThan(3, time() - $start);
}

}

0 comments on commit 73f41bd

Please sign in to comment.