From 4c7007d0b6ac76a24d9bf7b2fc8dd5fb31cf0906 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Sun, 25 Feb 2024 17:02:07 +0100 Subject: [PATCH 1/3] fix(logo-favicon): favicon.ico with valid content-type --- .../metascraper-logo-favicon/package.json | 1 + .../metascraper-logo-favicon/src/index.js | 14 ++++-- .../metascraper-logo-favicon/test/favicon.js | 44 ++++++++++++++++++- .../metascraper-logo-favicon/test/helpers.js | 24 ++++++++++ 4 files changed, 79 insertions(+), 4 deletions(-) create mode 100644 packages/metascraper-logo-favicon/test/helpers.js diff --git a/packages/metascraper-logo-favicon/package.json b/packages/metascraper-logo-favicon/package.json index ff3b7594b..5d5924027 100644 --- a/packages/metascraper-logo-favicon/package.json +++ b/packages/metascraper-logo-favicon/package.json @@ -30,6 +30,7 @@ "reachable-url": "~1.8.0" }, "devDependencies": { + "async-listen": "latest", "ava": "5" }, "engines": { diff --git a/packages/metascraper-logo-favicon/src/index.js b/packages/metascraper-logo-favicon/src/index.js index 2df6804d0..5c92f837d 100644 --- a/packages/metascraper-logo-favicon/src/index.js +++ b/packages/metascraper-logo-favicon/src/index.js @@ -110,10 +110,18 @@ pickBiggerSize.sortBySize = collection => const favicon = async (url, { gotOpts } = {}) => { const faviconUrl = logo('/favicon.ico', { url }) if (!faviconUrl) return undefined + const response = await reachableUrl(faviconUrl, gotOpts) - return reachableUrl.isReachable(response) && - response.headers['content-type']?.startsWith('image') - ? faviconUrl + const contentType = response.headers['content-type'] + + const isValidContenType = + contentType && + ['image/vnd.microsoft.icon', 'image/x-icon'].some(ct => + contentType.includes(ct) + ) + + return isValidContenType && reachableUrl.isReachable(response) + ? response.url : undefined } diff --git a/packages/metascraper-logo-favicon/test/favicon.js b/packages/metascraper-logo-favicon/test/favicon.js index e426fc7aa..9f067d0ca 100644 --- a/packages/metascraper-logo-favicon/test/favicon.js +++ b/packages/metascraper-logo-favicon/test/favicon.js @@ -4,12 +4,54 @@ const test = require('ava') const { favicon } = require('..') +const { runServer } = require('./helpers') + test('return undefined if favicon is not reachable', async t => { const url = 'https://idontexist.lol' t.is(await favicon(url), undefined) }) -test("with { contentType: 'image/vnd.microsoft.icon' }", async t => { +test("don't resolve favicon.ico with no content-type", async t => { + const server = await runServer( + t, + async ({ res }) => { + res.end('') + }, + { host: '0.0.0.0', port: 0 } + ) + t.is(await favicon(server), undefined) +}) + +test("don't resolve favicon.ico with no valid content-type", async t => { + const server = await runServer( + t, + async ({ res }) => { + res.setHeader('content-type', 'image/svg+xml; charset=utf-8') + res.end('') + }, + { host: '0.0.0.0', port: 0 } + ) + t.is(await favicon(server), undefined) +}) + +test("favicon.ico with 'image/vnd.microsoft.icon' content-type", async t => { const url = 'https://microlink.io/' t.is(await favicon(url), 'https://microlink.io/favicon.ico') }) + +test("favicon.ico with 'image/x-icon' content-type", async t => { + const url = 'https://2miners.com/' + t.is(await favicon(url), 'https://2miners.com/favicon.ico') +}) + +test('handle redirects', async t => { + const server = await runServer( + t, + async ({ res }) => { + res.writeHead(301, { Location: 'https://microlink.io/favicon.ico' }) + res.end() + }, + { host: '0.0.0.0', port: 0 } + ) + t.is(await favicon(server), 'https://microlink.io/favicon.ico') +}) diff --git a/packages/metascraper-logo-favicon/test/helpers.js b/packages/metascraper-logo-favicon/test/helpers.js new file mode 100644 index 000000000..9bf664f28 --- /dev/null +++ b/packages/metascraper-logo-favicon/test/helpers.js @@ -0,0 +1,24 @@ +'use strict' + +const { default: listen } = require('async-listen') +const { createServer } = require('http') + +const closeServer = server => + require('util').promisify(server.close.bind(server))() + +const runServer = async (t, handler, opts) => { + const server = createServer(async (req, res) => { + try { + await handler({ req, res }) + } catch (error) { + console.error(error) + res.statusCode = 500 + res.end() + } + }) + const url = await listen(server, opts) + t.teardown(() => closeServer(server)) + return url +} + +module.exports = { runServer } From 611948f42f313a904bff256cef9d4bdaee7fc367 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Sun, 25 Feb 2024 17:14:53 +0100 Subject: [PATCH 2/3] refactor: unify utils --- packages/metascraper-audio/package.json | 8 +++- packages/metascraper-audio/test/helpers.js | 24 +++++++++++ packages/metascraper-audio/test/iframe.js | 20 ++-------- .../metascraper-logo-favicon/package.json | 8 +++- .../metascraper-logo-favicon/test/favicon.js | 40 +++++++------------ .../metascraper-logo-favicon/test/helpers.js | 4 +- .../src/get-media/util.js | 8 +--- .../test/helpers.js | 2 +- packages/metascraper-video/package.json | 8 +++- packages/metascraper-video/test/helpers.js | 24 +++++++++++ packages/metascraper-video/test/iframe.js | 20 ++-------- 11 files changed, 95 insertions(+), 71 deletions(-) create mode 100644 packages/metascraper-audio/test/helpers.js create mode 100644 packages/metascraper-video/test/helpers.js diff --git a/packages/metascraper-audio/package.json b/packages/metascraper-audio/package.json index 903936e6b..7488e36a1 100644 --- a/packages/metascraper-audio/package.json +++ b/packages/metascraper-audio/package.json @@ -40,5 +40,11 @@ "scripts": { "test": "NODE_PATH=.. TZ=UTC ava --timeout 15s" }, - "license": "MIT" + "license": "MIT", + "ava": { + "files": [ + "test/**/*.js", + "!test/helpers.js" + ] + } } diff --git a/packages/metascraper-audio/test/helpers.js b/packages/metascraper-audio/test/helpers.js new file mode 100644 index 000000000..f8e39b93b --- /dev/null +++ b/packages/metascraper-audio/test/helpers.js @@ -0,0 +1,24 @@ +'use strict' + +const { default: listen } = require('async-listen') +const { createServer } = require('http') + +const closeServer = server => + require('util').promisify(server.close.bind(server))() + +const runServer = async (t, handler, opts) => { + const server = createServer(async (req, res) => { + try { + await handler({ req, res }) + } catch (error) { + console.error(error) + res.statusCode = 500 + res.end() + } + }) + const url = await listen(server, { port: 0, host: '0.0.0.0', ...opts }) + t.teardown(() => closeServer(server)) + return url.toString() +} + +module.exports = { runServer } diff --git a/packages/metascraper-audio/test/iframe.js b/packages/metascraper-audio/test/iframe.js index 88678a4d8..9418d8259 100644 --- a/packages/metascraper-audio/test/iframe.js +++ b/packages/metascraper-audio/test/iframe.js @@ -1,25 +1,19 @@ 'use strict' -const { default: listen } = require('async-listen') -const { createServer } = require('http') const test = require('ava') -const closeServer = server => - require('util').promisify(server.close.bind(server))() +const { runServer } = require('./helpers') const createMetascraper = (...args) => require('metascraper')([require('../src')(...args)]) test('absolute http', async t => { - const server = createServer((_, res) => { + const url = await runServer(t, ({ res }) => { res.setHeader('Content-Type', 'text/html') res.end( '' ) }) - - t.teardown(() => closeServer(server)) - const url = (await listen(server, { port: 0, host: '0.0.0.0' })).toString() const html = ` - - - -
Recommended
-
- - - - - - - - - - - - - - - -
- - - - - - - - - - -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
diff --git a/packages/metascraper/test/integration/geek-time/snapshots/index.js.md b/packages/metascraper/test/integration/geek-time/snapshots/index.js.md deleted file mode 100644 index 03273c9bc..000000000 --- a/packages/metascraper/test/integration/geek-time/snapshots/index.js.md +++ /dev/null @@ -1,23 +0,0 @@ -# Snapshot report for `test/integration/geek-time/index.js` - -The actual snapshot is saved in `index.js.snap`. - -Generated by [AVA](https://avajs.dev). - -## geek-time - -> Snapshot 1 - - { - audio: null, - author: 'Guest Contributor', - date: '2023-01-12T12:07:56.000Z', - description: 'The global markets are experiencing close to a 1-year long turmoil that involves high inflation rates, increasing interest rates, a bear stock market, and growing expectations for a recession, as demonstrated by inverse US-bond yield curves.', - image: 'https://images.unsplash.com/photo-1542751371-adc38448a05e?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDJ8fGdhbWluZ3xlbnwwfHx8fDE2NzMyNzE0NDE&ixlib=rb-4.0.3&q=80&w=2000content/images/size/w1200', - lang: 'en', - logo: 'https://www.geektime.com/content/images/size/w256h256/2022/04/icon-andro.png', - publisher: 'Geektime', - title: 'Have the bells tolled for the game market or is it a false alarm?', - url: 'https://www.geektime.com/gaming-market-2023/', - video: null, - } diff --git a/packages/metascraper/test/integration/geek-time/snapshots/index.js.snap b/packages/metascraper/test/integration/geek-time/snapshots/index.js.snap deleted file mode 100644 index e3a2dcafd1bfdffd886a8b6bb3a3b40dcf51c3aa..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 786 zcmV+t1MU1lRzVY1G-hU)7 zy&{Q492{*(oDx-7s zwJnWQ=nL*q>XlI|a#+Z`K&dmu+eB!&CwGie3rpO!Y*Kr&ZGXEE$E2ln-iX^>4adlJ zin%pS`=G5w_${5o%vj`TNsx2W=wsv#Q!0&izU50X*|fEgb%ekC941DmxRIn%6t!KE z8~49OmOSrfIrsw#?<;o}MO(KU*VNnH1N_5j?FV>6f{35=-yV_jeU9ju(i;-L_nJjG*&cOB0HIc%jD^~+qz QlscjR1LVxEQ;!4y0Mn<7A^-pY diff --git a/packages/metascraper/test/integration/postplanner/snapshots/index.js.md b/packages/metascraper/test/integration/postplanner/snapshots/index.js.md index 544ad810a..f4924d8d9 100644 --- a/packages/metascraper/test/integration/postplanner/snapshots/index.js.md +++ b/packages/metascraper/test/integration/postplanner/snapshots/index.js.md @@ -15,7 +15,7 @@ Generated by [AVA](https://avajs.dev). description: 'Want to get more Likes on Facebook in 2021? No problem. To increase Likes on your FB page, you need to boost Likes on your FB posts. Here’s how to do that.', image: 'https://www.postplanner.com/hubfs/blog/11_Ways_to_Get_Lots_of_Likes_on_Facebook_(the_right_way)/11%20Ways%20to%20Get%20Lots%20of%20Likes%20on%20Facebook%20(the%20right%20way!!)%20hero.png#keepProtocol', lang: null, - logo: 'https://www.postplanner.com/favicon.ico', + logo: 'https://cdn2.hubspot.net/hubfs/513577/v2/global/favicon.ico', publisher: 'Post Planner', title: 'Here’s How to Get More Likes on Facebook in 2021 (and Reach Millions)', url: 'https://www.postplanner.com/get-more-likes-fans-facebook-page/', diff --git a/packages/metascraper/test/integration/postplanner/snapshots/index.js.snap b/packages/metascraper/test/integration/postplanner/snapshots/index.js.snap index b11d77998e11c3e4fd5ae221024016e754cf5b53..a28312b7fc613be349286d56d8854835dbbe752b 100644 GIT binary patch literal 685 zcmV;e0#f}!RzVkZjn|ODHcTt zlVmm-H#;w6CSCVt51#x59u(`vzv7SZB={dZ>TH`*LU9h^eVO;ZH*dag&W3S;$;h03 zlUCV6j!HDPEQHpwQbvo?+5S(*wfUd@p%+yJ-8O|tY70Nwz=I{^6pFa8MtUkJd5 z4#3wAz@HAl{Vu@AF2L_Dz#3UtB@55vw=%JWT!t$vD?pZkECRU=Yov3Bbb-vTl6g_5 z3TLRh+}c|Zwau|=;L1}pc`fKbR3oJg>Q?$(Dy(e4UCub`g}k@Lju_t!w{|)A8DpD8EwxB9Y3Ud%Ne|RW8j70kiA2VTBdRpzjE9fuGo)pOaUsV( zJwmEk!UYv%6!fZW>B>;}PPGV!nI>?b%5@;V-xATBTq!Z11(QxjQ?`qD-RXibE}Xm*$g zb<_+RZP;8!mCAA(O~iCF2t$uESL$(Q(c`Qyt;d-o@HoSviyesr+T*Mt@;Gx;9%nN$ zkF$xGuCH%;oaM4Yztq{ik(A}2LW>C&ZFLKwv+I2_S73$>c-YLJq?-GA9h(xZucf^h zemmUS-q{IGc#stsiy|0`6P2L$Re~+vTV)*=%H;CWf=dG@*x>v|+d9miR)hMN>g}Ja z*O!(a{?|%$L+F&gkRr+Hp(+Z6+HAI(bnB{U;z$2eI+M2NoZBm0yWUV}_c?Fab50D} T$3Ic2Y#!oYi;=qFF$4er9@9rG literal 656 zcmV;B0&o36RzV+E>hGr8!`{trD zqL)xFLm!I>00000000AhQ^9W3Fc6(=+5&1Twv+=$S16*D*u-&fdjN?;w?ZwfP(=ko zik!ron9X{k*lE}sJ@646K$Hu=!j(HG#Fc*lZ%I`}Regx&#WQa_e$Qu3WCjfkQ06z2p zzVrZo_W)@<{a~n3D-d)E9XL-X}=5;D@ zf!y`Z-U_cxj#Uf$Pf_PJCu3esrP8Q6=}Rd$q6PODWpog-!7e?bY#8ni8S^QnuRBGn zsnAI!OCwSJ|JVBj$6Og=kYvJ;8CHTE$f?i-71`s7h!LkmD#9oWACqTD$_nE`%zSc$ zM5?3`Twf}gV_lKGC#2+=cxVw(LZp^HM%3ntMASz6WM5R`*ZYr}cm{JdeplKTalFojDvmmeIi4NidvmgwkV?NiB!Du8*bbv-jIEfaAiBLuD zpwYI;MO4WwH&MgqJ3$zFjM`F(Y ze7?1{<1w0x3jI=L_oqUX;|dKXSaj7bxXP~f$x?wCw&1~a^H2DROi=kU!4Ai*vW^R> zb8%s}wXqdvd|utX^Tji2(da^{{d1*8!jQvjZ9}%XO34eslbjsNqL8TcPN#`mS4A5? qx^AjWn1OZaps?)*6Rzy%yy3vQF6bWrM5dyBbbkRTAm92O1ONb63^p+U diff --git a/packages/metascraper/test/integration/segment/index.js b/packages/metascraper/test/integration/segment/index.js index fd6e52a5f..453482418 100644 --- a/packages/metascraper/test/integration/segment/index.js +++ b/packages/metascraper/test/integration/segment/index.js @@ -26,5 +26,6 @@ const url = 'https://segment.com/blog/scaling-nsq' test('segment', async t => { const html = await readFile(resolve(__dirname, 'input.html')) const metadata = await metascraper({ html, url }) + metadata.logo.replace('t3.gstatic.com', 't1.gstatic.com') t.snapshot(metadata) }) diff --git a/packages/metascraper/test/integration/segment/snapshots/index.js.md b/packages/metascraper/test/integration/segment/snapshots/index.js.md index 4a2fcdd4b..430450f8f 100644 --- a/packages/metascraper/test/integration/segment/snapshots/index.js.md +++ b/packages/metascraper/test/integration/segment/snapshots/index.js.md @@ -15,7 +15,7 @@ Generated by [AVA](https://avajs.dev). description: 'Segment is the analytics API you’ve always wanted. It’s the easiest way to install all of your favorite analytics tools at once!', image: 'https://c19f7be2e84987e7904e-bf41efcb49679c193a4ec0f3210da86f.ssl.cf1.rackcdn.com/photos/40528-1-1.jpg', lang: null, - logo: 'https://segment.com/favicon.ico', + logo: 'https://t1.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://segment.com/blog/scaling-nsq&size=128', publisher: 'Segment Blog', title: 'Scaling NSQ to 750 Billion Messages', url: 'https://segment.com/blog/scaling-nsq', diff --git a/packages/metascraper/test/integration/segment/snapshots/index.js.snap b/packages/metascraper/test/integration/segment/snapshots/index.js.snap index 5482935a0e3e124f1eb72983024d9051e16dd91d..41945f37732509caf30a869b2710bc99aca637ee 100644 GIT binary patch literal 664 zcmV;J0%!d}RzVvs4A{wEAPO7?7Uw`#g)m0P-#ZL9b zcQo7xj5Wq*C=J(=raDnZUtEPDHT2N_X2U%gFYCTFcK`tI0N?`v`~ZMo0Pw{Em@NU6 zO8`Gh0KZEBFKmEMHo%fKvuMq9gA-)NvhK`*5&CtF{RF=DsHmMkJNwzbTEOEL+l!bt)kQ&ULCsog{JKn0}*Y`bdZL(d@^NtEZ zvmt7h@zii7Z|_Q3G?{&yX^{>w_2S}(G(vc=ZDh-A%>6q*D z%bNhCnxi&kNJqpd!lgD;2r@n@8Uth`q63w2^EbFrN@zk2QIg@)qE!5h#&_vOi7}~O zt5lfZiW&h{u~}<18`x-hHFScg=3~TyTC3h@F~7A!YskE4rRsYjZPp`KYvHoUcQeXP zStwnm&MIl5jM9~ww_0sFzT>+mXH4F0b{5P{n+wC$o}!&w4lb yWViQvT-C;^M>e<+TuJf@wWcxZ0)XAmL~cL-AKpa*10G^NAAbNH0O;q$0{{SJe>$rG literal 581 zcmV-L0=oS{RzVKPhr9aXhy&Xi4{ zQHnc@Wgm+O00000000ARQcZ6YF%TVZ+5$=|nwA4Mj6mFWvzuz5GQ^Iz_pq zd_>wwt8r`#inxex`4Zi{RV6oVJw*3(Rz5u{C0Qe06e*xeJ188~xFFb(61Ni9y z{P6%<4S;>N)nZ#G$r%a9*t*=^-37J-tO@KP>@)8XYXIA9u}z$$(kxM9r}nmm6IYnB zf_tw}O{M0qOVXn7PiCZRgpCv(Rp4n5X(&5r_R}e>yYVM@4{N=l2J~xw_i>oPdR5P4gK11y&4fwIUxwu*p2(2XR_zdUV z87{STs1#oulPv^Fp5fG#()};)j8T@O<3pfBhX(O* z(5L<|jLA>3IHF7>@vzq)ifDL%F^Moc=tN}eiWRI^LoMxb$)lZHk8Rt1)fyyZl`RipW##rqXQ{Sjr(9cNtCn&t((1FHQ-2@ zx{1x@09WYQsX!%l&flKCUqs#ShWtn>C5`59$Xd+F)&v@pQdRZyyPPJ<