chore: fix typos (#2591)

Found few misspellings. https://github.com/crate-ci/typos Please review.
apify · Jul 31, 2024 · 72b1bd7 · 72b1bd7
1 parent d79db76
commit 72b1bd7
Show file tree

Hide file tree

Showing 41 changed files with 62 additions and 62 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -287,7 +287,7 @@ See [Conventional Commits](https://conventionalcommits.org) for commit guideline
 ### Bug Fixes
 
 * `retryOnBlocked` doesn't override the blocked HTTP codes ([#2243](https://github.com/apify/crawlee/issues/2243)) ([81672c3](https://github.com/apify/crawlee/commit/81672c3d1db1dcdcffb868de5740addff82cf112))
-* **browser-pool:** respect user options before assigning fingerpints ([#2190](https://github.com/apify/crawlee/issues/2190)) ([f050776](https://github.com/apify/crawlee/commit/f050776a916a0530aca6727a447a49252e643417)), closes [#2164](https://github.com/apify/crawlee/issues/2164)
+* **browser-pool:** respect user options before assigning fingerprints ([#2190](https://github.com/apify/crawlee/issues/2190)) ([f050776](https://github.com/apify/crawlee/commit/f050776a916a0530aca6727a447a49252e643417)), closes [#2164](https://github.com/apify/crawlee/issues/2164)
 * filter out empty globs ([#2205](https://github.com/apify/crawlee/issues/2205)) ([41322ab](https://github.com/apify/crawlee/commit/41322ab32d7db7baf61638d00fd7eaec9e5330f1)), closes [#2200](https://github.com/apify/crawlee/issues/2200)
 * make CLI work on Windows too with `--no-purge` ([#2244](https://github.com/apify/crawlee/issues/2244)) ([83f3179](https://github.com/apify/crawlee/commit/83f3179d6963dbaca4232d866356cc766db11dd8))
 * make SessionPool queue up getSession calls to prevent overruns ([#2239](https://github.com/apify/crawlee/issues/2239)) ([0f5665c](https://github.com/apify/crawlee/commit/0f5665c473371bff5a5d3abee3c3a9d23f2aeb23)), closes [#1667](https://github.com/apify/crawlee/issues/1667)

diff --git a/MIGRATIONS.md b/MIGRATIONS.md
@@ -123,7 +123,7 @@ To prevent bloat and to make access to certain key objects easier, we exposed a
 property on the handle page arguments.
 
 ```js
-const handePageFunction = async ({ request, page, crawler }) => {
+const handlePageFunction = async ({ request, page, crawler }) => {
     await crawler.requestQueue.addRequest({ url: 'https://example.com' });
     await crawler.autoscaledPool.pause();
 }
@@ -133,7 +133,7 @@ This also means that some shorthands like `puppeteerPool` or `autoscaledPool` we
 no longer necessary.
 
 ```js
-const handePageFunction = async (crawlingContext) => {
+const handlePageFunction = async (crawlingContext) => {
     crawlingContext.autoscaledPool // does NOT exist anymore
     crawlingContext.crawler.autoscaledPool // <= this is correct usage
 }

diff --git a/docs/upgrading/upgrading_v1.md b/docs/upgrading/upgrading_v1.md
@@ -133,7 +133,7 @@ To prevent bloat and to make access to certain key objects easier, we exposed a
 property on the handle page arguments.
 
 ```js
-const handePageFunction = async ({ request, page, crawler }) => {
+const handlePageFunction = async ({ request, page, crawler }) => {
     await crawler.requestQueue.addRequest({ url: 'https://example.com' });
     await crawler.autoscaledPool.pause();
 }
@@ -143,7 +143,7 @@ This also means that some shorthands like `puppeteerPool` or `autoscaledPool` we
 no longer necessary.
 
 ```js
-const handePageFunction = async (crawlingContext) => {
+const handlePageFunction = async (crawlingContext) => {
     crawlingContext.autoscaledPool // does NOT exist anymore
     crawlingContext.crawler.autoscaledPool // <= this is correct usage
 }

diff --git a/packages/browser-pool/CHANGELOG.md b/packages/browser-pool/CHANGELOG.md
@@ -162,7 +162,7 @@ See [Conventional Commits](https://conventionalcommits.org) for commit guideline
 
 ### Bug Fixes
 
-* **browser-pool:** respect user options before assigning fingerpints ([#2190](https://github.com/apify/crawlee/issues/2190)) ([f050776](https://github.com/apify/crawlee/commit/f050776a916a0530aca6727a447a49252e643417)), closes [#2164](https://github.com/apify/crawlee/issues/2164)
+* **browser-pool:** respect user options before assigning fingerprints ([#2190](https://github.com/apify/crawlee/issues/2190)) ([f050776](https://github.com/apify/crawlee/commit/f050776a916a0530aca6727a447a49252e643417)), closes [#2164](https://github.com/apify/crawlee/issues/2164)
 
 
 

diff --git a/packages/browser-pool/src/playwright/playwright-controller.ts b/packages/browser-pool/src/playwright/playwright-controller.ts
@@ -94,13 +94,13 @@ export class PlaywrightController extends BrowserController<
                     const session = await page.context().newCDPSession(page);
                     await session.send('Network.enable');
 
-                    session.on('Network.responseReceived', (responseRecevied) => {
+                    session.on('Network.responseReceived', (responseReceived) => {
                         const logOnly = ['Document', 'XHR', 'Fetch', 'EventSource', 'WebSocket', 'Other'];
-                        if (!logOnly.includes(responseRecevied.type)) {
+                        if (!logOnly.includes(responseReceived.type)) {
                             return;
                         }
 
-                        const { response } = responseRecevied;
+                        const { response } = responseReceived;
                         if (response.fromDiskCache || response.fromPrefetchCache || response.fromServiceWorker) {
                             return;
                         }

diff --git a/packages/browser-pool/tab-as-a-container/background.js b/packages/browser-pool/tab-as-a-container/background.js
@@ -44,7 +44,7 @@ const getCookieURL = (cookie) => {
 };
 
 // Rewrite cookies that were programmatically set to tabId instead of openerId.
-// This is requried because we cannot reliably get openerId inside Playwright.
+// This is required because we cannot reliably get openerId inside Playwright.
 chrome.cookies.onChanged.addListener(async (changeInfo) => {
     if (!changeInfo.removed) {
         const { cookie } = changeInfo;
@@ -105,7 +105,7 @@ chrome.webRequest.onBeforeSendHeaders.addListener(
 
             // Sometimes Chrome makes a request on a ghost tab.
             // We don't want these in order to prevent cluttering cookies.
-            // Yes, `webNavigation.onComitted` is emitted and `webNavigation.onCreatedNavigationTarget` is not.
+            // Yes, `webNavigation.onCommitted` is emitted and `webNavigation.onCreatedNavigationTarget` is not.
             if (header.name.toLowerCase() === 'purpose' && header.value === 'prefetch' && !counter.has(details.tabId)) {
                 // eslint-disable-next-line no-console
                 console.log(details);

diff --git a/packages/http-crawler/src/internals/file-download.ts b/packages/http-crawler/src/internals/file-download.ts
@@ -61,7 +61,7 @@ export type FileDownloadRequestHandler<
 /**
  * Provides a framework for downloading files in parallel using plain HTTP requests. The URLs to download are fed either from a static list of URLs or they can be added on the fly from another crawler.
  *
- * Since `FileDownload` uses raw HTTP requests to download the files, it is very fast and bandwith-efficient.
+ * Since `FileDownload` uses raw HTTP requests to download the files, it is very fast and bandwidth-efficient.
  * However, it doesn't parse the content - if you need to e.g. extract data from the downloaded files,
  * you might need to use {@apilink CheerioCrawler}, {@apilink PuppeteerCrawler} or {@apilink PlaywrightCrawler} instead.
  *

diff --git a/packages/jsdom-crawler/src/internals/jsdom-crawler.ts b/packages/jsdom-crawler/src/internals/jsdom-crawler.ts
@@ -42,7 +42,7 @@ export interface JSDOMCrawlerOptions<
      */
     runScripts?: boolean;
     /**
-     * Supress the logs from JSDOM internal console.
+     * Suppress the logs from JSDOM internal console.
      */
     hideInternalConsole?: boolean;
 }

diff --git a/packages/playwright-crawler/src/internals/playwright-crawler.ts b/packages/playwright-crawler/src/internals/playwright-crawler.ts
@@ -220,8 +220,8 @@ export class PlaywrightCrawler extends BrowserCrawler<
             );
         }
 
-        // `browserPlugins` is working when it's not overriden by `launchContext`,
-        // which for crawlers it is always overriden. Hence the error to use the other option.
+        // `browserPlugins` is working when it's not overridden by `launchContext`,
+        // which for crawlers it is always overridden. Hence the error to use the other option.
         if (browserPoolOptions.browserPlugins) {
             throw new Error('browserPoolOptions.browserPlugins is disallowed. Use launchContext.launcher instead.');
         }

diff --git a/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts b/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts
@@ -394,7 +394,7 @@ export async function cacheResponses(
                 };
             }
         } catch (e) {
-            // ignore errors, usualy means that buffer is empty or broken connection
+            // ignore errors, usually means that buffer is empty or broken connection
         }
     });
 }

diff --git a/packages/utils/src/internals/cheerio.ts b/packages/utils/src/internals/cheerio.ts
@@ -57,7 +57,7 @@ export function htmlToText(htmlOrCheerioElement: string | CheerioRoot): string {
                 let compr;
                 if (elem.parent && elem.parent.tagName === 'pre') compr = elem.data;
                 else compr = elem.data.replace(/\s+/g, ' ');
-                // If text is empty or ends with a whitespace, don't add the leading whitepsace
+                // If text is empty or ends with a whitespace, don't add the leading whitespace
                 if (compr.startsWith(' ') && /(^|\s)$/.test(text)) compr = compr.substring(1);
                 text += compr;
             } else if (elem.type === 'comment' || SKIP_TAGS_REGEX.test(elem.tagName)) {

diff --git a/packages/utils/src/internals/sitemap.ts b/packages/utils/src/internals/sitemap.ts
@@ -168,7 +168,7 @@ class SitemapXmlParser extends Transform {
 
 interface ParseSitemapOptions {
     /**
-     * If set to `true`, elements reffering to other sitemaps will be emitted as special objects with a `bouba` property.
+     * If set to `true`, elements referring to other sitemaps will be emitted as special objects with a `bouba` property.
      */
     emitNestedSitemaps?: true | false;
     /**

diff --git a/test/browser-pool/browser-pool.test.ts b/test/browser-pool/browser-pool.test.ts
@@ -592,7 +592,7 @@ describe.each([
                                 fingerprintCacheSize: 1,
                             },
                         });
-                        // cast to any type in order to acces the maxSize property for testing purposes.
+                        // cast to any type in order to access the maxSize property for testing purposes.
                         const cache: any = browserPoolCache!.fingerprintCache!;
                         expect(cache.maxSize).toBe(1);
                     });

diff --git a/test/core/crawlers/basic_crawler.test.ts b/test/core/crawlers/basic_crawler.test.ts
@@ -1397,7 +1397,7 @@ describe('BasicCrawler', () => {
         });
     });
 
-    describe('Dataset helpers, crawler paralellism', () => {
+    describe('Dataset helpers, crawler parallelism', () => {
         const payload: Dictionary[] = [{ foo: 'bar', baz: 123 }];
         const getPayload: (id: string) => Dictionary[] = (id) => [{ foo: id }];
 

diff --git a/test/core/crawlers/cheerio_crawler.test.ts b/test/core/crawlers/cheerio_crawler.test.ts
@@ -146,7 +146,7 @@ describe('CheerioCrawler', () => {
         });
     });
 
-    test('should work with explcit router', async () => {
+    test('should work with explicit router', async () => {
         const requestList = await getRequestListForMirror();
         const processed: Request[] = [];
         const failed: Request[] = [];
@@ -463,7 +463,7 @@ describe('CheerioCrawler', () => {
                         headers: {
                             'content-type': 'text/html',
                         },
-                        body: 'DATABASE ERRROR',
+                        body: 'DATABASE ERROR',
                     }),
                     maxRequestRetries: 1,
                     requestHandler: () => {

diff --git a/test/core/proxy_configuration.test.ts b/test/core/proxy_configuration.test.ts
@@ -126,7 +126,7 @@ describe('ProxyConfiguration', () => {
         });
 
         test('should rotate custom URLs with sessions correctly', async () => {
-            const sessions = ['sesssion_01', 'sesssion_02', 'sesssion_03', 'sesssion_04', 'sesssion_05', 'sesssion_06'];
+            const sessions = ['session_01', 'session_02', 'session_03', 'session_04', 'session_05', 'session_06'];
             const proxyConfiguration = new ProxyConfiguration({
                 proxyUrls: ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'],
             });

diff --git a/test/core/puppeteer_request_interception.test.ts b/test/core/puppeteer_request_interception.test.ts
@@ -162,7 +162,7 @@ describe('utils.puppeteer.addInterceptRequestHandler|removeInterceptRequestHandl
         }
     });
 
-    describe('internal handleRequest function should return correctly formated headers', () => {
+    describe('internal handleRequest function should return correctly formatted headers', () => {
         test('should correctly capitalize headers', async () => {
             const browser = await launchPuppeteer({ launchOptions: { headless: true } });
 

diff --git a/website/blog/2024/04-23-scrapy-vs-crawlee/index.md b/website/blog/2024/04-23-scrapy-vs-crawlee/index.md
@@ -82,7 +82,7 @@ One of the drawbacks of this plugin is its [lack of native support for windows](
 In Crawlee, you can scrape JavaScript rendered websites using the built-in headless [Puppeteer](https://github.com/puppeteer/puppeteer/) and [Playwright](https://github.com/microsoft/playwright) browsers. It is important to note that, by default, Crawlee scrapes in headless mode. If you don't want headless, then just set `headless: false`.
 
 <Tabs>
-<TabItem value="javscript" label="Playwright">
+<TabItem value="javascript" label="Playwright">
 
 ```js title="crawler.js"
 import { PlaywrightCrawler } from 'crawlee';

diff --git a/website/blog/2024/07-05-launching-crawlee-python/index.md b/website/blog/2024/07-05-launching-crawlee-python/index.md
@@ -1,7 +1,7 @@
 ---
 slug: launching-crawlee-python
 title: 'Announcing Crawlee for Python: Now you can use Python to build reliable web crawlers'
-description: 'Launching Crawlee for Python, a web scraping and automation libray to build reliable scrapers in Python fastly.'
+description: 'Launching Crawlee for Python, a web scraping and automation library to build reliable scrapers in Python fastly.'
 image: ./img/crawlee-python.webp
 author: Saurav Jain
 authorTitle: Developer Community Manager

diff --git a/website/versioned_docs/version-3.0/upgrading/upgrading_v1.md b/website/versioned_docs/version-3.0/upgrading/upgrading_v1.md
@@ -133,7 +133,7 @@ To prevent bloat and to make access to certain key objects easier, we exposed a
 property on the handle page arguments.
 
 ```js
-const handePageFunction = async ({ request, page, crawler }) => {
+const handlePageFunction = async ({ request, page, crawler }) => {
     await crawler.requestQueue.addRequest({ url: 'https://example.com' });
     await crawler.autoscaledPool.pause();
 }
@@ -143,7 +143,7 @@ This also means that some shorthands like `puppeteerPool` or `autoscaledPool` we
 no longer necessary.
 
 ```js
-const handePageFunction = async (crawlingContext) => {
+const handlePageFunction = async (crawlingContext) => {
     crawlingContext.autoscaledPool // does NOT exist anymore
     crawlingContext.crawler.autoscaledPool // <= this is correct usage
 }

diff --git a/website/versioned_docs/version-3.1/upgrading/upgrading_v1.md b/website/versioned_docs/version-3.1/upgrading/upgrading_v1.md
@@ -133,7 +133,7 @@ To prevent bloat and to make access to certain key objects easier, we exposed a
 property on the handle page arguments.
 
 ```js
-const handePageFunction = async ({ request, page, crawler }) => {
+const handlePageFunction = async ({ request, page, crawler }) => {
     await crawler.requestQueue.addRequest({ url: 'https://example.com' });
     await crawler.autoscaledPool.pause();
 }
@@ -143,7 +143,7 @@ This also means that some shorthands like `puppeteerPool` or `autoscaledPool` we
 no longer necessary.
 
 ```js
-const handePageFunction = async (crawlingContext) => {
+const handlePageFunction = async (crawlingContext) => {
     crawlingContext.autoscaledPool // does NOT exist anymore
     crawlingContext.crawler.autoscaledPool // <= this is correct usage
 }

diff --git a/website/versioned_docs/version-3.10/api-typedoc.json b/website/versioned_docs/version-3.10/api-typedoc.json
@@ -187287,7 +187287,7 @@
 							},
 							{
 								"kind": "text",
-								"text": " uses raw HTTP requests to download the files, it is very fast and bandwith-efficient.\nHowever, it doesn't parse the content - if you need to e.g. extract data from the downloaded files,\nyou might need to use "
+								"text": " uses raw HTTP requests to download the files, it is very fast and bandwidth-efficient.\nHowever, it doesn't parse the content - if you need to e.g. extract data from the downloaded files,\nyou might need to use "
 							},
 							{
 								"kind": "inline-tag",
@@ -203631,7 +203631,7 @@
 								"summary": [
 									{
 										"kind": "text",
-										"text": "Supress the logs from JSDOM internal console."
+										"text": "Suppress the logs from JSDOM internal console."
 									}
 								]
 							},

diff --git a/website/versioned_docs/version-3.10/upgrading/upgrading_v1.md b/website/versioned_docs/version-3.10/upgrading/upgrading_v1.md
@@ -133,7 +133,7 @@ To prevent bloat and to make access to certain key objects easier, we exposed a
 property on the handle page arguments.
 
 ```js
-const handePageFunction = async ({ request, page, crawler }) => {
+const handlePageFunction = async ({ request, page, crawler }) => {
     await crawler.requestQueue.addRequest({ url: 'https://example.com' });
     await crawler.autoscaledPool.pause();
 }
@@ -143,7 +143,7 @@ This also means that some shorthands like `puppeteerPool` or `autoscaledPool` we
 no longer necessary.
 
 ```js
-const handePageFunction = async (crawlingContext) => {
+const handlePageFunction = async (crawlingContext) => {
     crawlingContext.autoscaledPool // does NOT exist anymore
     crawlingContext.crawler.autoscaledPool // <= this is correct usage
 }

diff --git a/website/versioned_docs/version-3.11/api-typedoc.json b/website/versioned_docs/version-3.11/api-typedoc.json
@@ -192976,7 +192976,7 @@
 							},
 							{
 								"kind": "text",
-								"text": " uses raw HTTP requests to download the files, it is very fast and bandwith-efficient.\nHowever, it doesn't parse the content - if you need to e.g. extract data from the downloaded files,\nyou might need to use "
+								"text": " uses raw HTTP requests to download the files, it is very fast and bandwidth-efficient.\nHowever, it doesn't parse the content - if you need to e.g. extract data from the downloaded files,\nyou might need to use "
 							},
 							{
 								"kind": "inline-tag",
@@ -210136,7 +210136,7 @@
 								"summary": [
 									{
 										"kind": "text",
-										"text": "Supress the logs from JSDOM internal console."
+										"text": "Suppress the logs from JSDOM internal console."
 									}
 								]
 							},

diff --git a/website/versioned_docs/version-3.11/upgrading/upgrading_v1.md b/website/versioned_docs/version-3.11/upgrading/upgrading_v1.md
@@ -133,7 +133,7 @@ To prevent bloat and to make access to certain key objects easier, we exposed a
 property on the handle page arguments.
 
 ```js
-const handePageFunction = async ({ request, page, crawler }) => {
+const handlePageFunction = async ({ request, page, crawler }) => {
     await crawler.requestQueue.addRequest({ url: 'https://example.com' });
     await crawler.autoscaledPool.pause();
 }
@@ -143,7 +143,7 @@ This also means that some shorthands like `puppeteerPool` or `autoscaledPool` we
 no longer necessary.
 
 ```js
-const handePageFunction = async (crawlingContext) => {
+const handlePageFunction = async (crawlingContext) => {
     crawlingContext.autoscaledPool // does NOT exist anymore
     crawlingContext.crawler.autoscaledPool // <= this is correct usage
 }

diff --git a/website/versioned_docs/version-3.2/api-typedoc.json b/website/versioned_docs/version-3.2/api-typedoc.json
@@ -155632,7 +155632,7 @@
 								"summary": [
 									{
 										"kind": "text",
-										"text": "Supress the logs from JSDOM internal console."
+										"text": "Suppress the logs from JSDOM internal console."
 									}
 								]
 							},

diff --git a/website/versioned_docs/version-3.2/upgrading/upgrading_v1.md b/website/versioned_docs/version-3.2/upgrading/upgrading_v1.md
@@ -133,7 +133,7 @@ To prevent bloat and to make access to certain key objects easier, we exposed a
 property on the handle page arguments.
 
 ```js
-const handePageFunction = async ({ request, page, crawler }) => {
+const handlePageFunction = async ({ request, page, crawler }) => {
     await crawler.requestQueue.addRequest({ url: 'https://example.com' });
     await crawler.autoscaledPool.pause();
 }
@@ -143,7 +143,7 @@ This also means that some shorthands like `puppeteerPool` or `autoscaledPool` we
 no longer necessary.
 
 ```js
-const handePageFunction = async (crawlingContext) => {
+const handlePageFunction = async (crawlingContext) => {
     crawlingContext.autoscaledPool // does NOT exist anymore
     crawlingContext.crawler.autoscaledPool // <= this is correct usage
 }

diff --git a/website/versioned_docs/version-3.3/api-typedoc.json b/website/versioned_docs/version-3.3/api-typedoc.json
@@ -155459,7 +155459,7 @@
 								"summary": [
 									{
 										"kind": "text",
-										"text": "Supress the logs from JSDOM internal console."
+										"text": "Suppress the logs from JSDOM internal console."
 									}
 								]
 							},

diff --git a/website/versioned_docs/version-3.3/upgrading/upgrading_v1.md b/website/versioned_docs/version-3.3/upgrading/upgrading_v1.md
@@ -133,7 +133,7 @@ To prevent bloat and to make access to certain key objects easier, we exposed a
 property on the handle page arguments.
 
 ```js
-const handePageFunction = async ({ request, page, crawler }) => {
+const handlePageFunction = async ({ request, page, crawler }) => {
     await crawler.requestQueue.addRequest({ url: 'https://example.com' });
     await crawler.autoscaledPool.pause();
 }
@@ -143,7 +143,7 @@ This also means that some shorthands like `puppeteerPool` or `autoscaledPool` we
 no longer necessary.
 
 ```js
-const handePageFunction = async (crawlingContext) => {
+const handlePageFunction = async (crawlingContext) => {
     crawlingContext.autoscaledPool // does NOT exist anymore
     crawlingContext.crawler.autoscaledPool // <= this is correct usage
 }

diff --git a/website/versioned_docs/version-3.4/api-typedoc.json b/website/versioned_docs/version-3.4/api-typedoc.json
@@ -158534,7 +158534,7 @@
 								"summary": [
 									{
 										"kind": "text",
-										"text": "Supress the logs from JSDOM internal console."
+										"text": "Suppress the logs from JSDOM internal console."
 									}
 								]
 							},
Original file line number	Diff line number	Diff line change
Expand Up		@@ -162,7 +162,7 @@ See [Conventional Commits](https://conventionalcommits.org) for commit guideline

		### Bug Fixes

		* browser-pool: respect user options before assigning fingerpints ([#2190](https://github.com/apify/crawlee/issues/2190)) ([f050776](https://github.com/apify/crawlee/commit/f050776a916a0530aca6727a447a49252e643417)), closes [#2164](https://github.com/apify/crawlee/issues/2164)
		* browser-pool: respect user options before assigning fingerprints ([#2190](https://github.com/apify/crawlee/issues/2190)) ([f050776](https://github.com/apify/crawlee/commit/f050776a916a0530aca6727a447a49252e643417)), closes [#2164](https://github.com/apify/crawlee/issues/2164)



Expand Down
-Original file line number
+Diff line change
@@ Expand Up / @@ -394,7 +394,7 @@ export async function cacheResponses( @@
                     };
                 }
             } catch (e) {
-                // ignore errors, usualy means that buffer is empty or broken connection
+                // ignore errors, usually means that buffer is empty or broken connection
             }
         });
     }
@@ Expand Down @@