Skip to content
This repository has been archived by the owner on Jan 4, 2023. It is now read-only.

Commit

Permalink
sync master with local
Browse files Browse the repository at this point in the history
  • Loading branch information
rviscomi committed May 28, 2019
1 parent 58a1d11 commit 717dbd4
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 61 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
bulktest/wptkey.inc.php
bulktest/batch.log*
bulktest/httparchive_batch_lock*
node_modules/
tmp/
downloads/
harviewer/
*.log
*nohup.out
23 changes: 16 additions & 7 deletions bulktest/batch_lib.inc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ define("DONE", 4);
// The status table saves $gErrBase + i to indicate that there is a permanent error happens when the test is in the status i.
$gErrBase = 900;
// The list of tasks for a batch run.
$gNumParse = 5; // the number of parse tasks to fork
$gNumParse = 10; // the number of parse tasks to fork
$gaTasks = array("submit", "status", "obtain");
for ( $i = 1; $i <= $gNumParse; $i++ ) {
array_push($gaTasks, "parse" . $i); // dynamically create the desired number of parse tasks
Expand Down Expand Up @@ -91,12 +91,10 @@ function submitTest(&$record, $status) {

$wptServer = wptServer();
$location = $record['location'];
/*
if ($location == 'California:Chrome.3G') {
$location = 'California:Chrome.4G';
}
*/
$request = $wptServer . 'runtest.php?f=xml&priority=6&timeline=1&url=' . urlencode($record['url']) .
$request = $wptServer . 'runtest.php?f=xml&debug=1&priority=6&timeline=1&url=' . urlencode($record['url']) .
"&location=$location&runs=$runs" .
( $private ? "&private=1" : "" ) .
( $video ? "&video=1" : "" ) .
Expand Down Expand Up @@ -138,10 +136,18 @@ function submitTest(&$record, $status) {

// Submit the batch test to WPT server.
function submitBatch() {
global $gMaxQueueLength;
$submittedTests = countTestsWithCode(SUBMITTED);
$unsubmitTests = obtainTestsWithCode(NOT_STARTED);
if ( !isEmptyQuery($unsubmitTests) ) {
while ($row = mysqli_fetch_assoc($unsubmitTests)) {
submitTest($row, 0);
// Limit the number of in-flight tests
if ($gMaxQueueLength) {
$submittedTests++;
if ($submittedTests >= $gMaxQueueLength)
break;
}
}
}
}
Expand Down Expand Up @@ -180,13 +186,13 @@ function checkWPTStatus() {
$nNoResult = 0; // reset
setStatus($row['statusid'], SUBMIT_DONE);
}
elseif ( 400 <= $code ) {
elseif ( 400 <= $code || 100 > $code ) {
$nNoResult = 0; // reset
setStatus($row['statusid'], SUBMITTED + $gErrBase);
}
else {
$nNoResult++;
if ( $nNoResult > 200 ) {
if ( $nNoResult > 2000 ) {
// Quick bail:
// If we've exhausted all the completed results we do NOT want
// to continue checking EVERY remaining test. So instead we bail
Expand Down Expand Up @@ -966,7 +972,10 @@ function prettyType($mimeType, $ext) {
else if ( "xml" === $ext ) {
return "xml";
}
else if ( false !== strpos($mimeType, "flash") || "mp4" === $ext || "swf" === $ext || "f4v" === $ext || "flv" === $ext ) {
//Video extensions mp4, webm, ts, m4v, m4s, m4v, mov, ogv
else if ( false !== strpos($mimeType, "flash") || false !== strpos($mimeType, "webm") || false !== strpos($mimeType, "mp4") || false !== strpos($mimeType, "flv")
|| "mp4" === $ext || "webm" === $ext || "ts" == $ext || "m4v" === $ext || "m4s" === $ext || "mov" === $ext || "ogv" === $ext
|| "swf" === $ext || "f4v" === $ext || "flv" === $ext ) {
return "video";
}
else if ( false !== strpos($mimeType, "html") || "html" === $ext || "htm" === $ext ) {
Expand Down
4 changes: 4 additions & 0 deletions bulktest/batch_process.php
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@

updateCrawl($labelFromRun, $gArchive, $locations[0], array( "finishedDateTime" => time() ));

// Cleanup the requests table after the dump is complete (it is only used during a crawl to calculate aggregate stats)
doSimpleCommand("TRUNCATE TABLE $gRequestsTable;");
doSimpleCommand("optimize table $gRequestsTable;");

cprint(date("G:i") . ": DONE with crawl!");
exit(0);
}
Expand Down
4 changes: 2 additions & 2 deletions bulktest/batch_start.php
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,10 @@
loadUrlsFromDB($crawlid, $label, $gNumUrls);
}
else if ( $gbMobile ) {
loadUrlsFromDB($crawlid, $label, 1500000);
loadUrlsFromDB($crawlid, $label, 10000000);
}
else if ( $gbDev ) {
loadUrlsFromDB($crawlid, $label, 1500000);
loadUrlsFromDB($crawlid, $label, 10000000);
}

$numUrls = doSimpleQuery("select count(*) from $gStatusTable where crawlid=$crawlid;");
Expand Down
2 changes: 1 addition & 1 deletion bulktest/bootstrap.inc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ $private = false;
$docComplete = false; // &web10 WPT param
$gbMobileEmul = ( $gbAndroid || $gbMobile ? 1 : 0 ); // WPT param for whether we should use mobile emulation
$fvonly = true;
$runs = 3;
$runs = 1;
$mv = 1; // only save video for Median run
$gbNoScript = false;
$wptApiKey = '';
Expand Down
68 changes: 20 additions & 48 deletions bulktest/cleanup-requests.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,66 +17,38 @@
// The purpose is to free up disk space taken up by rows in the
// "requests" and "requestsdev" tables. We do NOT need these rows for
// any part of the UI, and all of the requests are archived in a dump
// file for each crawl. I'm too nervous to delete the rows automatically
// as part of the crawl process. Instead, once everything looks okay,
// I run this script manually.
// file for each crawl.

require_once("bootstrap.inc");
require_once("../utils.inc");

$now = time();

$gbActuallyDoit = false;
if ( array_key_exists(1, $argv) ) {
if ( "DOIT" == $argv[1] ) {
$gbActuallyDoit = true;
}
else {
cprint("Do 'php cleanup-requests.php DOIT' to actually delete the rows.");
}
}

$gSkipRuns = 1; // how many runs we want to skip and leave their requests intact
echo exec("df -h .") . "\n";

cleanupRequests("California:Chrome", "requestsdev");
cleanupRequests("California:Chrome", "requests");
cleanupRequests("California2:Chrome.3G", "requestsmobiledev");
cleanupRequests("California2:Chrome.3G", "requestsmobile");

echo "DONE\n\n";

function cleanupRequests($location, $table) {
global $gSkipRuns, $gbActuallyDoit;
$nUnfinished = doSimpleQuery("select count(*) from crawls where finishedDateTime is null;");
if ( 0 < $nUnfinished ) {
cprint("SORRY! There is an unfinished crawl. Skipping the cleanup while the crawl is running.");
exit(1);
}

$query = "select * from crawls where location = '$location' and finishedDateTime is not null order by crawlid desc limit " . ($gSkipRuns+1) . ";";
$results = doQuery($query);
mysqli_data_seek($results, $gSkipRuns);
$row = mysqli_fetch_assoc($results);
cleanupRequests("requestsdev");
cleanupRequests("requests");
cleanupRequests("requestsmobiledev");
cleanupRequests("requestsmobile");

if ( $gbActuallyDoit ) {
$nUnfinished = doSimpleQuery("select count(*) from crawls where location = '$location' and finishedDateTime is null;");
if ( 0 < $nUnfinished ) {
cprint("SORRY! There is an unfinished crawl for location '$location'. Skipping the cleanup while the crawl is running.");
return;
}
echo "DONE\n\n";

// Actually delete rows and optimize the table.
cprint("Delete requests from \"$table\" table starting with crawl \"{$row['label']}\" crawlid={$row['crawlid']} minPageid={$row['minPageid']} maxPageid={$row['maxPageid']} and earlier...");
$cmd = "delete from $table where crawlid <= {$row['crawlid']};";
cprint("$cmd");
doSimpleCommand($cmd);
cprint("Optimize table \"$table\"...");
doSimpleCommand("optimize table $table;");
cprint("Done with table \"$table\".");
}
else {
// How many rows would be deleted?
$numRows = doSimpleQuery("select count(*) from $table where crawlid <= {$row['crawlid']};");
cprint("$numRows rows to be deleted for $location in $table.");
function cleanupRequests($table) {
global $lastCrawl;

cprint("WOULD delete requests from \"$table\" table starting with crawl \"{$row['label']}\" crawlid={$row['crawlid']} minPageid={$row['minPageid']} maxPageid={$row['maxPageid']} and earlier...");
}
// Actually delete rows and optimize the table.
$cmd = "TRUNCATE TABLE $table;";
cprint("$cmd");
doSimpleCommand($cmd);
cprint("Optimize table \"$table\"...");
doSimpleCommand("optimize table $table;");
cprint("Done with table \"$table\".");

echo exec("df -h .") . "\n";
}
Expand Down
4 changes: 4 additions & 0 deletions bulktest/importurls.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@
if ( "alexa" === $gFileType ) {
doSimpleCommand("update $gUrlsTable set ranktmp=null;");
}
// Clear out existing CrUX URLs.
else if ( "other" === $gFileType ) {
doSimpleCommand("truncate table $gUrlsTable;");
}


$handle = @fopen($gUrlsFile, "r");
Expand Down
4 changes: 2 additions & 2 deletions crawls.inc
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ function dumpCrawl2($label, $archive=null, $location=null, $bMysql=true, $bCsv=t
$dumpfile = dumpfileName2($tablename, "csv") . ".gz";
tprint("Creating dump file $dumpfile...");
$dumpfileSql = dumpfileName2($tablename, "sql");
$tmpdir = "/tmp/$tablename." . time(); // Unique dir for this dump cuz mysqldump writes files that aren't writable by this process, and mysqldump -T can NOT overwrite existing files.
$tmpdir = "/var/tmp/$tablename." . time(); // Unique dir for this dump cuz mysqldump writes files that aren't writable by this process, and mysqldump -T can NOT overwrite existing files.
exec("mkdir $tmpdir; chmod 777 $tmpdir;");
$cmd = "mysqldump --opt --complete-insert --skip-add-drop-table -u $gMysqlUsername -p$gMysqlPassword -h $gMysqlServer -T $tmpdir $gMysqlDb $tablename; " .
"gzip -f -c $tmpdir/$tablename.txt > $dumpfile ; cp $tmpdir/$tablename.sql $dumpfileSql";
Expand Down Expand Up @@ -306,7 +306,7 @@ function dumpCrawl($label, $archive=null, $location=null) {
// pages csv
// Unique dir for this dump cuz mysqldump writes files that aren't writable by this process, and mysqldump -T can NOT overwrite existing files.
$labelUnderscore = str_replace(" ", "_", $label);
$tmpdir = "/tmp/$labelUnderscore." . time();
$tmpdir = "/var/tmp/$labelUnderscore." . time();
$cmd = "mkdir $tmpdir; chmod 777 $tmpdir;";
exec($cmd);
$dumpfile = dumpfileName($label, "pages", "csv");
Expand Down
3 changes: 2 additions & 1 deletion dbapi.inc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ $gUrlsChangeTableDesktop = $gUrlsChangeTable;
// Mobile tables
$gPagesTableMobile = $gPagesTable . "mobile";
$gRequestsTableMobile = $gRequestsTable . "mobile";
$gUrlsTableMobile = "urls";
$gUrlsTableMobile = "urlsmobile";
$gStatusTableMobile = $gStatusTable . "mobile";
$gStatsTableMobile = $gStatsTable; // share the data table - a first step toward a single DB

Expand Down Expand Up @@ -98,6 +98,7 @@ else if ( $gbMobile ) {
$gRequestsTable = $gRequestsTableMobile;
$gStatusTable = $gStatusTableMobile;
$gStatsTable = $gStatsTableMobile;
$gUrlsTable = $gUrlsTableMobile;
}
else if ( $gbChrome ) {
// Use a chrome version of the database tables if "chrome" is in the path.
Expand Down
2 changes: 2 additions & 0 deletions settings.inc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ $gHAUrl = "http://httparchive.org/";
$gHAMUrl = "http://mobile.httparchive.org/";
$gWPTUrl = "//httparchive.webpagetest.org/";

$gMaxQueueLength = 60000;

$gbPrivateInstance = false;

?>

0 comments on commit 717dbd4

Please sign in to comment.