Skip to content

Commit

Permalink
test: add introduction guide e2e test (#1976)
Browse files Browse the repository at this point in the history
  • Loading branch information
B4nan authored Jul 13, 2023
1 parent cc3950f commit 6030a2c
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ crawlee_storage
storage
.turbo
.npmrc
test/e2e/**/packages

# we use corepack, no need to commit yarn binary
.yarn
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"actorSpecification": 1,
"name": "test-playwright-introduction-guide",
"version": "0.0",
"buildTag": "latest",
"env": null
}
7 changes: 7 additions & 0 deletions test/e2e/playwright-introduction-guide/actor/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.idea
.DS_Store
node_modules
package-lock.json
apify_storage
crawlee_storage
storage
23 changes: 23 additions & 0 deletions test/e2e/playwright-introduction-guide/actor/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM node:16 AS builder

COPY /packages ./packages
COPY /package*.json ./
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional \
&& npm update

FROM apify/actor-node-playwright-chrome:16-beta

RUN rm -r node_modules
COPY --from=builder /node_modules ./node_modules
COPY --from=builder /packages ./packages
COPY --from=builder /package*.json ./
COPY /.actor ./.actor
COPY /main.js ./

RUN echo "Installed NPM packages:" \
&& (npm list --only=prod --no-optional --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version
72 changes: 72 additions & 0 deletions test/e2e/playwright-introduction-guide/actor/main.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import { Actor } from 'apify';
import { Dataset, createPlaywrightRouter, PlaywrightCrawler } from '@crawlee/playwright';

await Actor.init({ storage: process.env.STORAGE_IMPLEMENTATION === 'LOCAL' ? new (await import('@apify/storage-local')).ApifyStorageLocal() : undefined });

// createPlaywrightRouter() is only a helper to get better
// intellisense and typings. You can use Router.create() too.
export const router = createPlaywrightRouter();

// This replaces the request.label === DETAIL branch of the if clause.
router.addHandler('DETAIL', async ({
request,
page,
log,
}) => {
log.debug(`Extracting data: ${request.url}`);
const urlParts = request.url.split('/')
.slice(-2);
const modifiedTimestamp = await page.locator('time[datetime]')
.getAttribute('datetime');
const runsRow = page.locator('ul.ActorHeader-userMedallion > li')
.filter({ hasText: 'Runs' });
const runCountString = await runsRow.textContent();

const results = {
url: request.url,
uniqueIdentifier: urlParts.join('/'),
owner: urlParts[0],
title: await page.locator('.ActorHeader-identificator h1')
.textContent(),
description: await page.locator('p.ActorHeader-description')
.textContent(),
modifiedDate: new Date(Number(modifiedTimestamp)),
runCount: runCountString.replace('Runs ', ''),
};

log.info(`Saving data: ${request.url}`);
await Dataset.pushData(results);
});

// This is a fallback route which will handle the start URL
// as well as the LIST labeled URLs.
router.addDefaultHandler(async ({
request,
page,
enqueueLinks,
log,
}) => {
log.debug(`Enqueueing pagination: ${request.url}`);
await page.waitForSelector('.ActorStorePagination-buttons a');
await enqueueLinks({
selector: '.ActorStorePagination-buttons a',
label: 'LIST',
});
log.debug(`Enqueueing actor details: ${request.url}`);
await page.waitForSelector('div[data-test="actorCard"] a');
await enqueueLinks({
selector: 'div[data-test="actorCard"] a',
label: 'DETAIL', // <= note the different label
});
});

const crawler = new PlaywrightCrawler({
maxRequestsPerCrawl: 10, // so the test runs faster
// Instead of the long requestHandler with
// if clauses we provide a router instance.
requestHandler: router,
});

await crawler.run(['https://apify.com/store']);

await Actor.exit({ exit: Actor.isAtHome() });
29 changes: 29 additions & 0 deletions test/e2e/playwright-introduction-guide/actor/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"name": "test-playwright-introduction-guide",
"version": "0.0.1",
"description": "Crawlee Introduction Guide (playwright + chrome)",
"dependencies": {
"apify": "next",
"@apify/storage-local": "^2.1.0",
"@crawlee/basic": "file:./packages/basic-crawler",
"@crawlee/browser": "file:./packages/browser-crawler",
"@crawlee/browser-pool": "file:./packages/browser-pool",
"@crawlee/core": "file:./packages/core",
"@crawlee/memory-storage": "file:./packages/memory-storage",
"@crawlee/playwright": "file:./packages/playwright-crawler",
"@crawlee/types": "file:./packages/types",
"@crawlee/utils": "file:./packages/utils",
"playwright": "*"
},
"overrides": {
"apify": {
"@crawlee/core": "file:./packages/core",
"@crawlee/utils": "file:./packages/utils"
}
},
"scripts": {
"start": "node main.js"
},
"type": "module",
"license": "ISC"
}
8 changes: 8 additions & 0 deletions test/e2e/playwright-introduction-guide/test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { initialize, getActorTestDir, runActor, expect } from '../tools.mjs';

const testActorDirname = getActorTestDir(import.meta.url);
await initialize(testActorDirname);

const { stats } = await runActor(testActorDirname, 16384);

await expect(stats.requestsFinished >= 10, 'All requests finished');

0 comments on commit 6030a2c

Please sign in to comment.