From e3668875f56dd6264615fe084c4df73acdd679d3 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Mon, 17 Jun 2024 18:11:14 +0200 Subject: [PATCH] fix: ensure favicon detect in markup is expected content-type --- .../metascraper-logo-favicon/src/index.js | 76 ++++++++++-------- .../metascraper-logo-favicon/test/favicon.js | 12 +-- .../metascraper-logo-favicon/test/index.js | 67 +++++++++++++++ .../test/integration/fast-company/index.js | 4 +- .../fast-company/snapshots/index.js.md | 1 - .../fast-company/snapshots/index.js.snap | Bin 610 -> 583 bytes .../los-angeles-times/snapshots/index.js.md | 2 +- .../los-angeles-times/snapshots/index.js.snap | Bin 704 -> 649 bytes .../integration/stuff/snapshots/index.js.md | 2 +- .../integration/stuff/snapshots/index.js.snap | Bin 632 -> 633 bytes .../substack/snapshots/index.js.md | 2 +- .../substack/snapshots/index.js.snap | Bin 880 -> 907 bytes .../test/integration/therams/index.js | 1 - .../integration/wsj/snapshots/index.js.md | 2 +- .../integration/wsj/snapshots/index.js.snap | Bin 509 -> 508 bytes 15 files changed, 124 insertions(+), 45 deletions(-) diff --git a/packages/metascraper-logo-favicon/src/index.js b/packages/metascraper-logo-favicon/src/index.js index 4ec24630f..98c6f6ad3 100644 --- a/packages/metascraper-logo-favicon/src/index.js +++ b/packages/metascraper-logo-favicon/src/index.js @@ -1,14 +1,30 @@ 'use strict' -const { logo, parseUrl, normalizeUrl, toRule } = require('@metascraper/helpers') const { isEmpty, first, toNumber, chain, orderBy } = require('lodash') const reachableUrl = require('reachable-url') const memoize = require('@keyvhq/memoize') +const { + logo, + parseUrl, + normalizeUrl, + toRule, + extension +} = require('@metascraper/helpers') + +const ALLOWED_EXTENSION_CONTENT_TYPES = [ + ['ico', ['image/vnd.microsoft.icon', 'image/x-icon']], + ['png', ['image/png']] +] + const SIZE_REGEX_BY_X = /\d+x\d+/ const toLogo = toRule(logo) +const isValidContenType = (contentType, contentTypes) => { + return contentType && contentTypes.some(ct => contentType.includes(ct)) +} + const toSize = (input, url) => { if (isEmpty(input)) return @@ -85,9 +101,19 @@ const sizeSelectors = [ const firstReachable = async (domNodeSizes, gotOpts) => { for (const { url } of domNodeSizes) { const response = await reachableUrl(url, gotOpts) - if (reachableUrl.isReachable(response)) { - return response.url + if (!reachableUrl.isReachable(response)) continue + const contentType = response.headers['content-type'] + + const urlExtension = extension(url) + const contentTypes = ALLOWED_EXTENSION_CONTENT_TYPES.find( + ([ext]) => ext === urlExtension + ) + + if (contentTypes && !isValidContenType(contentType, contentTypes[1])) { + continue } + + return response.url } } @@ -109,22 +135,16 @@ const pickBiggerSize = async (sizes, { gotOpts } = {}) => { pickBiggerSize.sortBySize = collection => orderBy(collection, ['size.priority'], ['desc']) -const createFavicon = - ({ ext, contentTypes }) => - async (url, { gotOpts } = {}) => { - const faviconUrl = logo(`/favicon.${ext}`, { url }) - if (!faviconUrl) return undefined - - const response = await reachableUrl(faviconUrl, gotOpts) - const contentType = response.headers['content-type'] - - const isValidContenType = - contentType && contentTypes.some(ct => contentType.includes(ct)) - - return isValidContenType && reachableUrl.isReachable(response) - ? response.url - : undefined - } +const createFavicon = ([ext, contentTypes]) => { + return async (url, { gotOpts } = {}) => { + const faviconUrl = logo(`/favicon.${ext}`, { url }) + if (!faviconUrl) return undefined + const response = await reachableUrl(faviconUrl, gotOpts) + if (!reachableUrl.isReachable(response)) return undefined + const contentType = response.headers['content-type'] + return isValidContenType(contentType, contentTypes) && response.url + } +} const google = async (url, { gotOpts } = {}) => { const response = await reachableUrl(google.url(url), gotOpts) @@ -136,19 +156,11 @@ google.url = (url, size = 128) => const createGetLogo = ({ withGoogle, withFavicon, gotOpts, keyvOpts }) => { const getLogo = async url => { - const providers = [ - withFavicon && - createFavicon({ - ext: 'png', - contentTypes: ['image/png'] - }), - withFavicon && - createFavicon({ - ext: 'ico', - contentTypes: ['image/vnd.microsoft.icon', 'image/x-icon'] - }), - withGoogle && google - ].filter(Boolean) + const providers = ALLOWED_EXTENSION_CONTENT_TYPES.map( + ext => withFavicon && createFavicon(ext) + ) + .concat(withGoogle && google) + .filter(Boolean) for (const provider of providers) { const logoUrl = await provider(url, { gotOpts }) diff --git a/packages/metascraper-logo-favicon/test/favicon.js b/packages/metascraper-logo-favicon/test/favicon.js index 46cdc7019..c77270433 100644 --- a/packages/metascraper-logo-favicon/test/favicon.js +++ b/packages/metascraper-logo-favicon/test/favicon.js @@ -6,11 +6,11 @@ const { createFavicon } = require('..') const { runServer } = require('./helpers') -const faviconPNG = createFavicon({ ext: 'png', contentTypes: ['image/png'] }) -const faviconICO = createFavicon({ - ext: 'ico', - contentTypes: ['image/vnd.microsoft.icon', 'image/x-icon'] -}) +const faviconPNG = createFavicon(['png', ['image/png']]) +const faviconICO = createFavicon([ + 'ico', + ['image/vnd.microsoft.icon', 'image/x-icon'] +]) test('return undefined if favicon is not reachable', async t => { const url = 'https://idontexist.lol' @@ -36,7 +36,7 @@ test("don't resolve favicon.ico with no valid content-type", async t => { res.setHeader('content-type', 'image/svg+xml; charset=utf-8') res.end('') }) - t.is(await faviconICO(url), undefined) + t.is(await faviconICO(url), false) }) test("favicon.png with 'image/png' content-type", async t => { diff --git a/packages/metascraper-logo-favicon/test/index.js b/packages/metascraper-logo-favicon/test/index.js index c71a8da90..2725ad47b 100644 --- a/packages/metascraper-logo-favicon/test/index.js +++ b/packages/metascraper-logo-favicon/test/index.js @@ -4,6 +4,8 @@ const { readFile } = require('fs/promises') const { resolve } = require('path') const test = require('ava') +const { runServer } = require('./helpers') + const createMetascraper = opts => require('metascraper')([require('..')(opts)]) const createHtml = meta => @@ -251,3 +253,68 @@ test('avoid wrong data URI', async t => { const metadata = await metascraper({ url, html }) t.is(metadata.logo, 'https://www.adobe.com/favicon.ico') }) + +test("favicon.ico detected in HTML markup can't be random content-type", async t => { + const url = await runServer(t, async ({ res }) => { + res.setHeader('content-type', 'image/svg+xml') + res.end('') + }) + + const html = + '' + const metascraper = createMetascraper() + const metadata = await metascraper({ url, html }) + t.is(metadata.logo, null) +}) + +test('favicon.ico detected in HTML markup can be `image/x-icon` content-type', async t => { + const url = await runServer(t, async ({ res }) => { + res.setHeader('content-type', 'image/x-icon') + res.end() + }) + + const html = + '' + const metascraper = createMetascraper() + const metadata = await metascraper({ url, html }) + t.is(metadata.logo, `${url}favicon.ico`) +}) + +test('favicon.ico detected in HTML markup can be `image/vnd.microsoft.icon` content-type', async t => { + const url = await runServer(t, async ({ res }) => { + res.setHeader('content-type', 'image/vnd.microsoft.icon') + res.end() + }) + + const html = + '' + const metascraper = createMetascraper() + const metadata = await metascraper({ url, html }) + t.is(metadata.logo, `${url}favicon.ico`) +}) + +test("favicon.png detected in HTML markup can't be random content-type", async t => { + const url = await runServer(t, async ({ res }) => { + res.setHeader('content-type', 'image/svg+xml') + res.end('') + }) + + const html = + '' + const metascraper = createMetascraper() + const metadata = await metascraper({ url, html }) + t.is(metadata.logo, null) +}) + +test('favicon.png detected in HTML markup can be `image/png` content-type', async t => { + const url = await runServer(t, async ({ res }) => { + res.setHeader('content-type', 'image/png') + res.end() + }) + + const html = + '' + const metascraper = createMetascraper() + const metadata = await metascraper({ url, html }) + t.is(metadata.logo, `${url}favicon.png`) +}) diff --git a/packages/metascraper/test/integration/fast-company/index.js b/packages/metascraper/test/integration/fast-company/index.js index 549f77e80..b1186f066 100644 --- a/packages/metascraper/test/integration/fast-company/index.js +++ b/packages/metascraper/test/integration/fast-company/index.js @@ -26,6 +26,8 @@ const url = test('fast-company', async t => { const html = await readFile(resolve(__dirname, 'input.html')) - const metadata = await metascraper({ html, url }) + const { logo, ...metadata } = await metascraper({ html, url }) t.snapshot(metadata) + t.is(typeof logo, 'string') + t.true(new URL(logo).hostname.endsWith('.gstatic.com'), logo) }) diff --git a/packages/metascraper/test/integration/fast-company/snapshots/index.js.md b/packages/metascraper/test/integration/fast-company/snapshots/index.js.md index fad51184b..b8566c446 100644 --- a/packages/metascraper/test/integration/fast-company/snapshots/index.js.md +++ b/packages/metascraper/test/integration/fast-company/snapshots/index.js.md @@ -15,7 +15,6 @@ Generated by [AVA](https://avajs.dev). description: 'Lack of access to capital is a big challenge, but so is the lack of access to networks and advisors.', image: 'http://b.fastcompany.net/multisite_files/fastcompany/imagecache/620x350/poster/2016/05/3060169-poster-p-1-one-of-the-biggest-challenges-of-getting-funding-for-minority-owned-business.jpg', lang: 'en', - logo: 'https://www.fastcompany.com/favicon.ico', publisher: 'Fast Company', title: 'One Of The Biggest Challenges Of Getting Funding For Minority-Owned Business', url: 'http://www.fastcompany.com/3060169/one-of-the-biggest-challenges-of-getting-funding-for-minority-owned-business', diff --git a/packages/metascraper/test/integration/fast-company/snapshots/index.js.snap b/packages/metascraper/test/integration/fast-company/snapshots/index.js.snap index 13f39a185dc556f235268a2d10adfd0ac5b14c44..161ce77b46f391b2f5fb582a105c0cf4a5367f3a 100644 GIT binary patch literal 583 zcmV-N0=WG_RzVB9Z=e)f^ds(?BnB~^POYgB~fnGmAn0pq$$Tdk&cA2 zMJdf%J0nY%S#q~`aV$xG@W1+SADYG{jI%6wo4N9;h>v9=rJ_d8rq)|A<*h5f}k z){?){E-R$zti2aY!WKLmhoh+oCu02KVmz9SPiNs|7>41;&Zff{ovL(6+M0GQUPyJt zZNjBe=o}}@l`J*MoNLD=k95jaCi5IkiqCme6L+?uC_|qAv)3T4Y<1$RskZG&l`vDnIISYbl*o3spXlRgvqRvS>P53#{IlABvKG>Y0q{=Xuj>DUiNf?yY z5mrItMi5SdlW-c2rZ2^IT9jfWtU+NDK^cljrztjXqAkufOo~L>q#~(JyyN-j;>0uQ^PJuK@e*Dk|-#*gRH5kKFS)OBg|a9gxk8sy7_ zUu4M7w$l0eUV6j4-b&~5t#m%O6@Sx8UpCVD*-m=rCcm!oc5HWHtX8YP;!w6|4GsRM Vi`}BHb&T!*_ze@EkR-(e003?&9!3BF literal 610 zcmV-o0-gOqRzV*d_gi~(dH_U z(0*PicWr`GRVY1$o7*f?q|V}BF+7C!VUO7MdjR+d09OF;6##w$fMNjC2;eLN_#6TJ zhycbh!1ownpKb23%~^Vmni#tq?(Xga+X1!(>|;IJZ*>v-&#~6xA$rt(!$plfh(7B zm0g

oDjy!OT_B{eAQeN}_5iWjbhsTIV!yNOIm-GQkjj%8kV!|M2B<4Xt#EaxzM; z4#tVBd?2i4+YOnFPnL|Fy`Duy#aD~s0?(kdZ?FyeCukiV? wulc(Uf7asj!!>?yr?{zYH}>m5|3$VAMgGqu`p}mq!|sm#2JcWMmkk5}0Er|p`2YX_ diff --git a/packages/metascraper/test/integration/los-angeles-times/snapshots/index.js.md b/packages/metascraper/test/integration/los-angeles-times/snapshots/index.js.md index 12c58ddd2..239fb9cc9 100644 --- a/packages/metascraper/test/integration/los-angeles-times/snapshots/index.js.md +++ b/packages/metascraper/test/integration/los-angeles-times/snapshots/index.js.md @@ -15,7 +15,7 @@ Generated by [AVA](https://avajs.dev). description: 'Tech start-up Appthority’s office has plush conference rooms, soundproof phone booths, an enormous kitchen and a view of San Francisco Bay. It has ping-pong and foosball tables, beer on tap and 11 types of tea.', image: 'http://www.trbimg.com/img-572421a4/turbine/la-fi-tn-tech-downturn-20160429', lang: 'en', - logo: 'http://www.trbas.com/jive/prod/common/images/lanews-apple-touch-icon.1q2w3_9ffdb679907f116af126c65ff1edb27a.png', + logo: 'https://www.latimes.com/favicon.ico', publisher: 'latimes.com', title: 'As venture capital dries up, tech start-ups discover frugality', url: 'http://www.latimes.com/business/technology/la-fi-tn-tech-downturn-20160429-story.html', diff --git a/packages/metascraper/test/integration/los-angeles-times/snapshots/index.js.snap b/packages/metascraper/test/integration/los-angeles-times/snapshots/index.js.snap index a9a6c11adf9f9a931bd3a73f70cd4a12ba79e847..be3cc570643ca6a932b5dc5b516caf1bb2d65e5e 100644 GIT binary patch literal 649 zcmV;40(SjDRzVBoGn>0SN@f z+8*0W);r7Y_>SwyuRs(Op`zzsP$N1TehKm=7oAMOR`Yf~_RX8w(;_vT9fy;zBw7r; zHymPROEMH<(KQ7fta71Z2`8s{u0*}+fA7O(Xisz{y7m$P-T=S{0Qd#~zX9M^1aQy; zc-sT`(gXO>1GwD>_|ym3iPpEH^=Iij$|Q;|%&o00h&CbGfap5xM7?zmzMXCAc|B#&_dnFHR;$<77X%vp>E+Op@ev zCut+6ka_Jyvpv7-{e&aRDh#5$#Erv4=UO6K=6^nZ4#-8JGr~#*bfyUvW^4=cWHUm~ zT!#S$Zfx#6bAhg6OPDfC6$Yp*l4W0W6Yy9|R*^+zb5uChG(#@%xw81kE1T(%F+NoD zAwHI~$FwbD$F^K#6wD!2#-OOwP#9oJdU*i~NFs=ScvNhRX; zN2A$nHWZ)gx*TR)k91wed-o>0ld;+zN#j#(X=GGf=vZtllvQ!gGb@d^aVvPTJ9*HZ zicz*)f*#p_++6Y!+-g~ba~ei17IeX*P*a_;9qNob`Bz<&8XYRS*prQw#BR@vPq0J> zhXJS5k|)fR)1nOKUQ@uv4Nz8BAYk5(V@e(i-;~N|nRjaU8*i5Oixo9j+en+BErl>@ ji(58a&i|)o9EAORSV?WV%}jMp%dhhnuCmau=>q@&G7>GE literal 704 zcmV;x0zdshRzVpW_j#SKT%LY(}`vu z3saFM9|zTtSKbKgYYzVOtSCefFNeQ|a1FW&4w^%4z=GxVUP^FhFg!vD#)4)3R9rKsiGNjiO+<=hfWx&ykvYJ<|BL* z&M#A@iY=RJv7==6xzHMe$Tj&b%!wSb!657+A|yKlGJbx^lS?#7-g;%o m`=mSEu;yz1zskfuu$zzS(CGeSrmCQ28~z4(Z%bV@1ONadyGqRf diff --git a/packages/metascraper/test/integration/stuff/snapshots/index.js.md b/packages/metascraper/test/integration/stuff/snapshots/index.js.md index e20b31694..4d3a017e6 100644 --- a/packages/metascraper/test/integration/stuff/snapshots/index.js.md +++ b/packages/metascraper/test/integration/stuff/snapshots/index.js.md @@ -15,7 +15,7 @@ Generated by [AVA](https://avajs.dev). description: 'Orphee Mickalad is on track to replace his former history teacher Tangi Utikere on Palmerston North City Council.', image: 'https://resources.stuff.co.nz/content/dam/images/4/y/p/h/8/h/image.related.StuffLandscapeSixteenByNine.1420x800.4yr12n.png/1613526047477.jpg', lang: 'en', - logo: 'https://www.stuff.co.nz/sics-assets/images/favicons/apple-touch-icon.png', + logo: 'https://www.stuff.co.nz/sics-assets/images/favicons/safari-pinned-tab.svg', publisher: 'Stuff', title: 'Orphee Mickalad leading Palmerston North by-election', url: 'https://www.stuff.co.nz/manawatu-standard/news/300232751/orphee-mickalad-leading-palmerston-north-byelection', diff --git a/packages/metascraper/test/integration/stuff/snapshots/index.js.snap b/packages/metascraper/test/integration/stuff/snapshots/index.js.snap index 6cd452150e9c520a61375b76dbe7b1e2dcb7f771..49050151646122f76ff2adef77c90522bc0e7975 100644 GIT binary patch literal 633 zcmV-<0*3uTRzVdVDMQT9;ahbJe zH|}QbU_0GSZ@u#kpd$W)GatdPxCBJ^EexxrrAM}9u&hQ&+|_`(z+xktg5{-y32hzwslFw z-YPN03MEsH8O^*!F=gH`OLc{USg9Nv8#^Pr485@}^Ms-#%T7^G6npR0lq@aD@35MY zb>3)pWUMdQq4F&|G>sOjO8Om9Gn{l*!mm(zU%La&EjiOzK`vR%EfFTs7hD+aNqa7F z##cng3*PdYmwca>|5Ayi3O&i>WJ!F3S~`I>ov4K;(l6R0r76jZJYDRkX;QQ{%XL!g z3D5Sjo!xvdEr!K#n4Hyl}>nTnVEc zcQ{6?;#z4N-=U&E<%&=$J-I!qQ5#c5V!0x{ z7o)~jU0mJeZiX7?=$qJi)Do@an&yu0q-nmB4|g+eRzl+0wUGE)NL=3tiS<%QJZkUI TF*sL}x-aw>JBS(*$A9yZk|3dPU@XJ()XvS z0AM><+X~i>$7dvb5Zr{Do0|}9K(G$MW7rPDC&2)M;Z`ujx{zjt1{=M-HLQJUYzH@9 z8*SK2RJFU9Vejm{0zGx$S(apJlw?slI!)4pJUPfmah7GDdZhJ&oUpR;(&#St<=9pw z5qm4e3>6kkI%YKU7R8Kt!z@(_1+h{(HZgWiwhg_p4fBMeB+E`wPbGWr<%}$~P!~>MMJICUiS&!+NNS4HJWH1QNfPIcO*0)= zddkzibZ0l)OY%`Z8pUVTv|n_EdfI`I^sQ|(H63`>`B*F#|2c3{xCosi?>Yw)JeR^~ z$FZsuMc&k+jM_}As`tplsvaxpO1hbo;mT&es;iOjP~LB{A}pky-tN@6iAa%XC!}}d zpth=u)m`T1sBwY5j+{qbpe?wjh2uL(lI>)p-ISY^kZ67_B)S$7RX0K+-3p1u%^f<1 S=dz&g2mJ+b=Xy8f0{{RdEHat^ diff --git a/packages/metascraper/test/integration/substack/snapshots/index.js.md b/packages/metascraper/test/integration/substack/snapshots/index.js.md index a28a83643..06289385f 100644 --- a/packages/metascraper/test/integration/substack/snapshots/index.js.md +++ b/packages/metascraper/test/integration/substack/snapshots/index.js.md @@ -14,7 +14,7 @@ Generated by [AVA](https://avajs.dev). description: 'The world is a very malleable place. When I read biographies, early lives leap out the most. Leonardo da Vinci was a studio apprentice to Verrocchio at 14. Walt Disney took on a number of jobs, chiefly delivering papers, from 11 years old. Vladimir Nabokov published his first book (a collection of poems) at 16, while still in school. Andrew Carnegie', image: 'https://substackcdn.com/image/fetch/w_1200,h_600,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fbucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com%2Fpublic%2Fimages%2Fef3bd0df-b9fa-4358-afee-116c23f4c55f_2560x1902.jpeg', lang: 'en', - logo: 'https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fbucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com%2Fpublic%2Fimages%2F1115e358-65d9-4f1c-872a-f1ea44965132%2Fapple-touch-icon-1024x1024.png', + logo: 'https://t1.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://simonsarris.substack.com/p/the-most-precious-resource-is-agency&size=128', publisher: 'The Map is Mostly Water', title: 'The Most Precious Resource is Agency', url: 'https://map.simonsarris.com/p/the-most-precious-resource-is-agency', diff --git a/packages/metascraper/test/integration/substack/snapshots/index.js.snap b/packages/metascraper/test/integration/substack/snapshots/index.js.snap index 62113baa8cd8a069c526198d06d0b0371ff5e87b..8988b1a95bc8e77ee58fc9c5c628673b30bd364a 100644 GIT binary patch literal 907 zcmV;619bdBRzV^GkecEuAPktHd+-rN3?B0cvX_RvB?6cp~IJaITfzBg}Xc%Q~n@umIn2RY#+ z<+h5gL%tNYL~V2B?1%d#L5J^lzv{4$SiQWDy(d?UvA2vd#~AyaG4}NiW77^}zjPQo z=rXqGGIrl(?C&mP&%M28-ri+=N8EYd<92_4pLtK2_bKx}XV1NzFT4)(cAt5>SS3O= zR_E!KZx1V%E7O{vU5i4MaE-PN?$XY-i!U$qClV$OME1gcX?$6euNO zED5wkP9fapRKhtJ!USTWGJ`r7WPKnsasyJV$bux)pehG0Cn%J4AzY9u(Ig5Igd0(E z0c)&LtgABu)Y?$#1SfC`ZpawLd0sml496kdqIB@1uqAE4DYXPu0zz38F&R)P+^N|5 zz;i*V+(1G#42qzL?H~k!dNZU3Un1qVRK3#7Zy@s ztb@1)`xb#KDT&vD)o@x-VZUq09s01&g(R>}NC~0@%X6h<2(vOVw1zV@C1rxPODPJ> zs8#@@2?lbXfW{e#o=J!^F=D8^wY(i&ir2&SUGi? zDV6xCTxg>*L)MBFomxkv{j3S-Hz>W)tQXCD(LApBk{poL#4zwocme}3Wr zbaio1899BdNj*xonWV6-PlKZSE~lXG@IV{lLRB^}WL0H21;PfHQOP$4R=lU_Fgo7K zpa h-T%<0S)dO8o$CM3aGT#sB-DOW{{T?0S&m2r000$ttug=r literal 880 zcmV-$1CRVcRzV}~=fVxu7CS`v%|(%3WheC*Cj92U?>NCZ%V zc0ocKmD^>zciLS|cY7zpEx!bX1HX+sM=r?LP7VVSobbWwa+P0Izp8p~%3QPD_}9Nu zP(f4e+uR3~8|4jJUvu!U-xLJ~{NW@YhzFv3qKDC=7ea{dg%Cjq@rw}R!y_R+9SHH` zK#1m8h~JKd`2ARj)rk;KqthqR={NZ;$q+?vk7s9RB6=*McSZERcp4pj5Di3h@+3OJ zwou$#gU1KHQ*1-cZg0N#QZ;PgB|4}4LFl7`ymZP2#peFo(K`CBCfKsm1t<>)cjR`^ zpw@)BCa@YMC2&)dfiJ-k7LY5h9NJou9|57$JJ9NmJZM4-+y)3WLBl>I@D;H^S1=R^ z*UCr*TkNv<(A@~oT1O@*Ne~#W$vKv??i>NqSpqkx1AL*pp&bO~4X^6Id;1X^* z_al(CqEhdmpso(5Oa&G#IX{BZaRX@zJ3{Ayb&-`Z;Q(;C~r zmlbm{D)rhrt{i!Ud$3dO|ZeA+2Gc9wGFdXPnjHMtG-M>%vMxR^I}qzalR}uo=xW$ zF_uJenl5BEEoX8*FW1?8F?p3PCs}f9sXAchSfi=-#u1snxw+y!Z-4wJ-pc;+|4P#T zz)qT`bLu!*%!_3_D^nR?oM#x9DdB9kT+GvHHq2INt)@6|E9+QEHgP)1X0N(eV$C66 z@9gLH?e0J8R{1&FZjqm}4|)eT7|0!tV6SU?>!bcB_5r?iBo((FUXbV3N$PN}Dl&3+ zfG}vC-k(3)ccMX?_)dI$uRVP?>^RgEcTy1Bd-T}dqsPj}_)q93cdDSh+WiF=@H;k- G1pojb+^2*9 diff --git a/packages/metascraper/test/integration/therams/index.js b/packages/metascraper/test/integration/therams/index.js index 0f435961f..e3ace8ca2 100644 --- a/packages/metascraper/test/integration/therams/index.js +++ b/packages/metascraper/test/integration/therams/index.js @@ -26,6 +26,5 @@ const url = test('therams', async t => { const html = await readFile(resolve(__dirname, 'input.html')) const metadata = await metascraper({ html, url }) - console.log(metadata) t.snapshot(metadata) }) diff --git a/packages/metascraper/test/integration/wsj/snapshots/index.js.md b/packages/metascraper/test/integration/wsj/snapshots/index.js.md index 6125b5815..ffb86fe1f 100644 --- a/packages/metascraper/test/integration/wsj/snapshots/index.js.md +++ b/packages/metascraper/test/integration/wsj/snapshots/index.js.md @@ -15,7 +15,7 @@ Generated by [AVA](https://avajs.dev). description: 'Funding Snapshot:', image: 'http://si.wsj.net/img/WSJ_Logo_black_social.gif', lang: 'en', - logo: 'http://s.wsj.net/media/wsj-pro-favicon.ico', + logo: 'https://www.wsj.com/apple-touch-icon.png', publisher: 'WSJ', title: 'Funding Snapshot: Software Development Platform CircleCI Raises $18M', url: 'http://www.wsj.com/articles/funding-snapshot-software-development-platform-circleci-raises-18m-1463398202', diff --git a/packages/metascraper/test/integration/wsj/snapshots/index.js.snap b/packages/metascraper/test/integration/wsj/snapshots/index.js.snap index 743b7295c521f551d2dc2d972ba8de4d92b7e5da..2f6e18ac90a12410734e95ef1515ee82c65a6054 100644 GIT binary patch literal 508 zcmVpuyk!Pi!mS`~o^q5>gB zoAuhBWW8&7*EQEZL?57i0v>=T;Eu%GP}Rgmxa?`Z8O{D?KFw9Sl$x*2#kH_#rQnTO zp;4?fxwhuwDoL1SpPXMVJcQdv9+Af%0pJS&d_g)i&4F|BDY2 zW<@8Sjl*b4!&fw#eu$!Y62@WZhhg}+*R+!e!z(N;YSraNN3}{&W!`*aMBE<{3zprX zg526N4g!OIV^+QrHozha&gLhJQ=RF>QZl|?7|oGMKf|;?wqz>ngd^0wvd(lTUfiNg z90X0%v@ouVfR&{b)ash&6uDM@sj}WD+hx6!XmU|?8TV{{(%-7RfKDCWGr^nd)HbXV z-k~@bQkR8L)_W(JO?6dx2Uu|_4vxL|42>|}^Jw;_M{w&(b}?>T`)8BNBA3Dh=^r-j yE|HoInkIkIwA`R6Z_ylS)uL%MD`@m`I+?tljl*$&&2vmdcV9QHUqv$j0{{Tb&g~Zf literal 509 zcmVr)_)R+(p+01Q5As*q5>gB zoAuh6WbL(f*Kx0X2p*t30uR6g@B&D@45%g>B$qwSH>26#e51L|7E8P0ImVx3jq88fZqUcNdVRa;2Qz>NdQhg zfbSl_G3gCR@6BQ*xFuv4_WOMxM?emNJceW9oe&?$-hk|}Iz`ng|EN>zvDz+`{(td7 z%B*nW$uLgFG=4#o@%tp1j^b$?hjAQ#>NFju!f=h1MOC`o;ALH=SmwcOyNGFbL@Zdo zLq$t#t7#M&3>&iwOJO4{^5|-IF~3x~nlB{d>$y=JnGAExx?@XL=1zE`yjNDaa^jgo z?MQ`4kwuMJ(Ml_tu?=!nhR9Xtn}e!eNHj~KUB0;cPX_}_B!&MP6ZIMf1qUB#6G_AI1%3Cx?s#`QoCIwBNk4K}^$uJ&vZEi3X?!x{6(3#)<0s{a5T2ku3