diff --git a/packages/metascraper-helpers/index.js b/packages/metascraper-helpers/index.js index ac0145ee5..909d07574 100644 --- a/packages/metascraper-helpers/index.js +++ b/packages/metascraper-helpers/index.js @@ -19,12 +19,18 @@ const isUri = require('is-uri') const { URL } = require('url') const tldts = require('tldts') -const urlRegex = require('url-regex-safe')({ +const METASCRAPER_RE2 = process.env.METASCRAPER_RE2 + ? process.env.METASCRAPER_RE2 === 'true' + : undefined + +const urlRegexForTest = require('url-regex-safe')({ exact: true, parens: true, - re2: process.env.METASCRAPER_RE2 - ? process.env.METASCRAPER_RE2 === 'true' - : undefined + re2: METASCRAPER_RE2 +}) + +const urlRegexForMatch = require('url-regex-safe')({ + re2: METASCRAPER_RE2 }) const { @@ -120,7 +126,7 @@ const AUTHOR_MAX_LENGTH = 128 const removeLocation = value => replace(value, REGEX_LOCATION, '') const isUrl = (url, { relative = false } = {}) => - relative ? isRelativeUrl(url) : urlRegex.test(url) + relative ? isRelativeUrl(url) : urlRegexForTest.test(url) const urlObject = (...args) => { try { @@ -485,6 +491,8 @@ const loadIframe = (url, $, { timeout = 5000 } = {}) => listen(iframe, 'add', load) }) +const getUrls = input => String(input).match(urlRegexForMatch) ?? [] + module.exports = { $filter, $jsonld, @@ -499,6 +507,7 @@ module.exports = { extension, fileExtension, findRule, + getUrls, has, image, imageExtensions, diff --git a/packages/metascraper-helpers/test/index.js b/packages/metascraper-helpers/test/index.js index 727f4af65..b895facbf 100644 --- a/packages/metascraper-helpers/test/index.js +++ b/packages/metascraper-helpers/test/index.js @@ -12,6 +12,7 @@ const { date, description, extension, + getUrls, image, isAudioExtension, isAudioUrl, @@ -37,6 +38,26 @@ const measure = fn => { return (diff[0] * 1e9 + diff[1]) / 1e6 } +test('.getUrls', t => { + t.deepEqual(getUrls(undefined), []) + t.deepEqual(getUrls(null), []) + t.deepEqual(getUrls(''), []) + t.deepEqual( + getUrls( + 'engineering ▲ @vercel; founder of https://t.co/4PQvCsVNsA https://t.co/fpiHwbEPBv https://t.co/IG8Qq0IDKi https://t.co/gblDRx1P9D https://t.co/SmoZi3hAhb https://t.co/Y0Uk1XU3Eu https://t.co/PAq3eTEhmI' + ), + [ + 'https://t.co/4PQvCsVNsA', + 'https://t.co/fpiHwbEPBv', + 'https://t.co/IG8Qq0IDKi', + 'https://t.co/gblDRx1P9D', + 'https://t.co/SmoZi3hAhb', + 'https://t.co/Y0Uk1XU3Eu', + 'https://t.co/PAq3eTEhmI' + ] + ) +}) + test('.parseUrl', t => { const fn = () => parseUrl('https://example.com') /* this assertion ensure parseUrl memoize the value */ diff --git a/packages/metascraper-twitter/README.md b/packages/metascraper-twitter/README.md deleted file mode 100644 index 48c241dca..000000000 --- a/packages/metascraper-twitter/README.md +++ /dev/null @@ -1,22 +0,0 @@ -
-
- metascraper -
-
-

metascraper-twitter: Metascraper integration with Twitter.

-

See our website for more information.

-
-
- -## Install - -```bash -$ npm install metascraper-twitter --save -``` - -## License - -**metascraper-twitter** © [Microlink](https://microlink.io), released under the [MIT](https://github.com/microlinkhq/metascraper/blob/master/LICENSE.md) License.
-Authored and maintained by [Microlink](https://microlink.io) with help from [contributors](https://github.com/microlinkhq/metascraper/contributors). - -> [microlink.io](https://microlink.io) · GitHub [microlinkhq](https://github.com/microlinkhq) · Twitter [@microlinkhq](https://twitter.com/microlinkhq) diff --git a/packages/metascraper-twitter/src/index.js b/packages/metascraper-twitter/src/index.js deleted file mode 100644 index eb7754b09..000000000 --- a/packages/metascraper-twitter/src/index.js +++ /dev/null @@ -1,65 +0,0 @@ -'use strict' - -const { - $jsonld, - author, - date, - image, - memoizeOne, - parseUrl, - title, - toRule, - video -} = require('@metascraper/helpers') - -const toAuthor = toRule(author) -const toDate = toRule(date) -const toImage = toRule(image) -const toVideo = toRule(video) -const toTitle = toRule(title) - -const test = memoizeOne(url => parseUrl(url).domainWithoutSuffix === 'twitter') - -const REGEX_IMG_MODIFIERS = /_(?:bigger|mini|normal|x96)\./ -const ORIGINAL_IMG_SIZE = '_400x400' - -const isTweet = url => url.includes('/status/') - -const avatarUrl = str => - str?.replace(REGEX_IMG_MODIFIERS, `${ORIGINAL_IMG_SIZE}.`) - -module.exports = () => { - const rules = { - author: [ - toAuthor($jsonld('author.givenName')), - toAuthor($ => { - const author = $('meta[property="og:title"]').attr('content') - return author?.includes(' on X') ? author.split(' on X')[0] : author - }) - ], - title: [toTitle(($, url) => `@${url.split('/')[3]} on X`)], - date: [ - toDate(($, url) => { - const id = url.replace('https://twitter.com', '') - return $(`a[href="${id}"] time`).attr('datetime') - }) - ], - image: [ - toImage( - ($, url) => - isTweet(url) && $('meta[property="og:image"]').attr('content') - ), - toImage(($, url) => isTweet(url) && $('video').attr('poster')), - toImage($ => avatarUrl($jsonld('author.image.contentUrl')($))), - toImage($ => avatarUrl($('article img[src]').attr('src'))) - ], - video: [toVideo(($, url) => isTweet(url) && $('video').attr('src'))], - publisher: () => 'X' - } - - rules.test = ({ url }) => test(url) - - return rules -} - -module.exports.test = test diff --git a/packages/metascraper-twitter/test/fixtures/profile-video.html b/packages/metascraper-twitter/test/fixtures/profile-video.html deleted file mode 100644 index bd68cdde4..000000000 --- a/packages/metascraper-twitter/test/fixtures/profile-video.html +++ /dev/null @@ -1,248 +0,0 @@ - - - - - - - - -Brad, what are you gonna do? (@k4rliky) / Twitter - -
Did someone say … cookies?
Twitter and its partners use cookies to provide you with a better, safer and faster service and to support our business. Some cookies are necessary to use our services, improve our services, and make sure they work properly. Show more about your choices.
Accept all cookies
Refuse non-essential cookies

Brad, what are you gonna do?

95.6K Tweets
Opens profile photo
Follow
Click to Follow k4rliky
Brad, what are you gonna do?
@k4rliky
Research Scientist interested in Quantum Computing, Complexity Science, Reverse Engineering, Programming. - - and me. karliky.dev
Science & Technology***On Map Dungeon***karliky.comJoined May 2010

Brad, what are you gonna do?’s Tweets

Taleb talebeando en pleno 2023
Quote Tweet
Nassim Nicholas Taleb
@nntaleb
If you don't get why, between 2019 and 2022, I turned down exactly 10 requests to be on his podcast, this will provide a succinct explanation. twitter.com/lexfridman/sta…
Show this thread
diff --git a/packages/metascraper-twitter/test/fixtures/profile.html b/packages/metascraper-twitter/test/fixtures/profile.html deleted file mode 100644 index f10fa081a..000000000 --- a/packages/metascraper-twitter/test/fixtures/profile.html +++ /dev/null @@ -1,125 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - diff --git a/packages/metascraper-twitter/test/fixtures/tweet-gif.html b/packages/metascraper-twitter/test/fixtures/tweet-gif.html deleted file mode 100644 index 421975a15..000000000 --- a/packages/metascraper-twitter/test/fixtures/tweet-gif.html +++ /dev/null @@ -1,549 +0,0 @@ - - - -#!/kiko/beats on X: "Experimenting with Clearbit API + Apple TV 3D Parallax https://t.co/Qsm163k4mJ https://t.co/5bcuqoEyAa" / X
Don’t miss what’s happening
People on X are the first to know.
Did someone say … cookies?
X and its partners use cookies to provide you with a better, safer and faster service and to support our business. Some cookies are necessary to use our services, improve our services, and make sure they work properly. Show more about your choices.
Accept all cookies
Refuse non-essential cookies

Post

Conversation

New to X?

Sign up now to get your own personalized timeline!
Sign up with Apple
Create account
By signing up, you agree to the Terms of Service and Privacy Policy, including Cookie Use.
Trends are unavailable.
\ No newline at end of file diff --git a/packages/metascraper-twitter/test/fixtures/tweet-image.html b/packages/metascraper-twitter/test/fixtures/tweet-image.html deleted file mode 100644 index f6ebfcbd8..000000000 --- a/packages/metascraper-twitter/test/fixtures/tweet-image.html +++ /dev/null @@ -1,540 +0,0 @@ - - - -I don’t know, but I’ll find out. - Marty Sklar on X: "Lo mejor de @codemotion_es #codemotionMadrid es estar con la gente que quieres 😍@ladyCircus https://t.co/NQCYS8Yjt1" / X
Don’t miss what’s happening
People on X are the first to know.
Did someone say … cookies?
X and its partners use cookies to provide you with a better, safer and faster service and to support our business. Some cookies are necessary to use our services, improve our services, and make sure they work properly. Show more about your choices.
Accept all cookies
Refuse non-essential cookies

New to X?

Sign up now to get your own personalized timeline!
Sign up with Apple
Create account
By signing up, you agree to the Terms of Service and Privacy Policy, including Cookie Use.
Trends are unavailable.
\ No newline at end of file diff --git a/packages/metascraper-twitter/test/fixtures/tweet.html b/packages/metascraper-twitter/test/fixtures/tweet.html deleted file mode 100644 index 71a2d85c1..000000000 --- a/packages/metascraper-twitter/test/fixtures/tweet.html +++ /dev/null @@ -1,117 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - diff --git a/packages/metascraper-twitter/test/snapshots/index.js.md b/packages/metascraper-twitter/test/snapshots/index.js.md deleted file mode 100644 index 89c7d02e6..000000000 --- a/packages/metascraper-twitter/test/snapshots/index.js.md +++ /dev/null @@ -1,92 +0,0 @@ -# Snapshot report for `test/index.js` - -The actual snapshot is saved in `index.js.snap`. - -Generated by [AVA](https://avajs.dev). - -## from a Twitter profile - -> Snapshot 1 - - { - author: '#!/kiko/beats (Kikobeats)', - date: null, - description: `engineering ▲ @vercel; founder of␊ - https://t.co/4PQvCsVNsA␊ - https://t.co/fpiHwbEPBv␊ - https://t.co/IG8Qq0IDKi␊ - https://t.co/gblDRx1P9D␊ - https://t.co/SmoZi3hAhb␊ - https://t.co/Y0Uk1XU3Eu␊ - https://t.co/PAq3eTEhmI`, - image: null, - lang: 'en', - publisher: 'X', - title: '@Kikobeats on X', - url: 'https://twitter.com/Kikobeats', - video: null, - } - -## from a Twitter profile with tweets with video - -> Snapshot 1 - - { - author: 'Brad, what are you gonna do?', - date: '2010-05-23T16:41:35.000Z', - description: 'Research Scientist interested in Quantum Computing, Complexity Science, Reverse Engineering, Programming. @ladyCircus and me. https://t.co/M5v7b5owoD', - image: 'https://pbs.twimg.com/profile_images/1603675274348040192/y9P6VlyX_400x400.jpg', - lang: 'en', - publisher: 'X', - title: '@k4rliky on X', - url: 'https://twitter.com/k4rliky', - video: null, - } - -## from a tweet - -> Snapshot 1 - - { - author: 'Donald J. Trump (realDonaldTrump)', - date: null, - description: '“Schiff blasted for not focusing on California homeless.” @foxandfriends His District is in terrible shape. He is a corrupt pol who only dreams of the Impeachment Hoax. In my opinion he is mentally deranged!', - image: 'https://pbs.twimg.com/profile_images/874276197357596672/kUuht00m_200x200.jpg', - lang: 'en', - publisher: 'X', - title: '@realDonaldTrump on X', - url: 'https://twitter.com/realDonaldTrump/status/1222907250383245320', - video: null, - } - -## from a tweet with a gif - -> Snapshot 1 - - { - author: '#!/kiko/beats', - date: '2017-06-28T19:01:34.000Z', - description: null, - image: 'https://pbs.twimg.com/tweet_video_thumb/DDbh3WCXYAAZfz9.jpg', - lang: 'en', - publisher: 'X', - title: '@Kikobeats on X', - url: 'https://twitter.com/Kikobeats/status/880139124791029763', - video: 'https://video.twimg.com/tweet_video/DDbh3WCXYAAZfz9.mp4', - } - -## from a tweet with an image - -> Snapshot 1 - - { - author: 'I don’t know, but I’ll find out. - Marty Sklar', - date: '2017-11-25T18:04:12.000Z', - description: null, - image: 'https://pbs.twimg.com/profile_images/1734680668213231616/_iul1bFE_400x400.jpg', - lang: 'en', - publisher: 'X', - title: '@k4rliky on X', - url: 'https://twitter.com/k4rliky/status/934482867480121345', - video: null, - } diff --git a/packages/metascraper-twitter/test/snapshots/index.js.snap b/packages/metascraper-twitter/test/snapshots/index.js.snap deleted file mode 100644 index 2b1e51b2d..000000000 Binary files a/packages/metascraper-twitter/test/snapshots/index.js.snap and /dev/null differ diff --git a/packages/metascraper-twitter/CHANGELOG.md b/packages/metascraper-x/CHANGELOG.md similarity index 71% rename from packages/metascraper-twitter/CHANGELOG.md rename to packages/metascraper-x/CHANGELOG.md index cb06e973f..0db87792a 100644 --- a/packages/metascraper-twitter/CHANGELOG.md +++ b/packages/metascraper-x/CHANGELOG.md @@ -5,43 +5,43 @@ See [Conventional Commits](https://conventionalcommits.org) for commit guideline ## [5.45.6](https://github.com/microlinkhq/metascraper/compare/v5.45.5...v5.45.6) (2024-05-19) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x # [5.45.0](https://github.com/microlinkhq/metascraper/compare/v5.44.0...v5.45.0) (2024-02-25) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x # [5.44.0](https://github.com/microlinkhq/metascraper/compare/v5.43.7...v5.44.0) (2024-02-11) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.43.7](https://github.com/microlinkhq/metascraper/compare/v5.43.6...v5.43.7) (2024-02-10) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.43.4](https://github.com/microlinkhq/metascraper/compare/v5.43.3...v5.43.4) (2024-01-23) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.43.3](https://github.com/microlinkhq/metascraper/compare/v5.43.2...v5.43.3) (2024-01-17) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.43.2](https://github.com/microlinkhq/metascraper/compare/v5.43.1...v5.43.2) (2024-01-06) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.43.1](https://github.com/microlinkhq/metascraper/compare/v5.43.0...v5.43.1) (2024-01-02) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x # [5.43.0](https://github.com/microlinkhq/metascraper/compare/v5.42.6...v5.43.0) (2023-12-30) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.42.5](https://github.com/microlinkhq/metascraper/compare/v5.42.4...v5.42.5) (2023-12-20) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.42.2](https://github.com/microlinkhq/metascraper/compare/v5.42.1...v5.42.2) (2023-12-19) @@ -63,23 +63,23 @@ See [Conventional Commits](https://conventionalcommits.org) for commit guideline # [5.39.0](https://github.com/microlinkhq/metascraper/compare/v5.38.0...v5.39.0) (2023-11-23) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x # [5.38.0](https://github.com/microlinkhq/metascraper/compare/v5.37.2...v5.38.0) (2023-11-09) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.37.1](https://github.com/microlinkhq/metascraper/compare/v5.37.0...v5.37.1) (2023-09-18) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x # [5.36.0](https://github.com/microlinkhq/metascraper/compare/v5.35.2...v5.36.0) (2023-08-16) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.35.2](https://github.com/microlinkhq/metascraper/compare/v5.35.1...v5.35.2) (2023-08-15) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.35.1](https://github.com/microlinkhq/metascraper/compare/v5.35.0...v5.35.1) (2023-08-10) @@ -89,43 +89,43 @@ See [Conventional Commits](https://conventionalcommits.org) for commit guideline # [5.35.0](https://github.com/microlinkhq/metascraper/compare/v5.34.10...v5.35.0) (2023-08-09) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.34.10](https://github.com/microlinkhq/metascraper/compare/v5.34.9...v5.34.10) (2023-07-29) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.34.9](https://github.com/microlinkhq/metascraper/compare/v5.34.8...v5.34.9) (2023-07-17) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.34.7](https://github.com/microlinkhq/metascraper/compare/v5.34.6...v5.34.7) (2023-05-29) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.34.6](https://github.com/microlinkhq/metascraper/compare/v5.34.5...v5.34.6) (2023-05-26) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.34.4](https://github.com/microlinkhq/metascraper/compare/v5.34.3...v5.34.4) (2023-05-03) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.34.3](https://github.com/microlinkhq/metascraper/compare/v5.34.2...v5.34.3) (2023-04-24) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.34.2](https://github.com/microlinkhq/metascraper/compare/v5.34.1...v5.34.2) (2023-04-03) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.34.1](https://github.com/microlinkhq/metascraper/compare/v5.34.0...v5.34.1) (2023-03-29) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.33.7](https://github.com/microlinkhq/metascraper/compare/v5.33.6...v5.33.7) (2023-01-30) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.33.6](https://github.com/microlinkhq/metascraper/compare/v5.33.5...v5.33.6) (2023-01-27) @@ -135,19 +135,19 @@ See [Conventional Commits](https://conventionalcommits.org) for commit guideline ## [5.33.5](https://github.com/microlinkhq/metascraper/compare/v5.33.4...v5.33.5) (2023-01-23) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.33.4](https://github.com/microlinkhq/metascraper/compare/v5.33.3...v5.33.4) (2023-01-09) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.33.3](https://github.com/microlinkhq/metascraper/compare/v5.33.2...v5.33.3) (2023-01-03) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.33.2](https://github.com/microlinkhq/metascraper/compare/v5.33.1...v5.33.2) (2023-01-02) -**Note:** Version bump only for package metascraper-twitter +**Note:** Version bump only for package metascraper-x ## [5.33.1](https://github.com/microlinkhq/metascraper/compare/v5.33.0...v5.33.1) (2023-01-01) @@ -159,4 +159,4 @@ See [Conventional Commits](https://conventionalcommits.org) for commit guideline ### Features -* add metascraper-twitter ([#608](https://github.com/microlinkhq/metascraper/issues/608)) ([075c0ab](https://github.com/microlinkhq/metascraper/commit/075c0ab1141f9104b21a3496c43fe5c6b3bf17c0)), closes [#260](https://github.com/microlinkhq/metascraper/issues/260) +* add metascraper-x ([#608](https://github.com/microlinkhq/metascraper/issues/608)) ([075c0ab](https://github.com/microlinkhq/metascraper/commit/075c0ab1141f9104b21a3496c43fe5c6b3bf17c0)), closes [#260](https://github.com/microlinkhq/metascraper/issues/260) diff --git a/packages/metascraper-x/README.md b/packages/metascraper-x/README.md new file mode 100644 index 000000000..a79b45334 --- /dev/null +++ b/packages/metascraper-x/README.md @@ -0,0 +1,46 @@ +
+
+ metascraper +
+
+

metascraper-x: Metascraper integration for x.com.

+

See our website for more information.

+
+
+ +## Install + +```bash +$ npm install metascraper-x --save +``` + +## API + +### metascraper-x([options]) + +#### options + +##### gotOpts + +Type: `object` + +Any option provided here will passed to [got#options](https://github.com/sindresorhus/got#options). + +##### resolveUrls + +Type: `boolean` + +Set to `true` if you want to resolve `t.co` URLs into the final URL. + +##### resolveUrl + +Type: `function` + +A decorator to be called after `t.co` is resolved. It doesn't do anything by default. + +## License + +**metascraper-x** © [Microlink](https://microlink.io), released under the [MIT](https://github.com/microlinkhq/metascraper/blob/master/LICENSE.md) License.
+Authored and maintained by [Microlink](https://microlink.io) with help from [contributors](https://github.com/microlinkhq/metascraper/contributors). + +> [microlink.io](https://microlink.io) · GitHub [microlinkhq](https://github.com/microlinkhq) · Twitter [@microlinkhq](https://twitter.com/microlinkhq) diff --git a/packages/metascraper-twitter/package.json b/packages/metascraper-x/package.json similarity index 82% rename from packages/metascraper-twitter/package.json rename to packages/metascraper-x/package.json index 4df939f55..20cc748f6 100644 --- a/packages/metascraper-twitter/package.json +++ b/packages/metascraper-x/package.json @@ -1,7 +1,7 @@ { - "name": "metascraper-twitter", - "description": "Metascraper integration with Twitter", - "homepage": "https://github.com/microlinkhq/metascraper/packages/metascraper-twitter", + "name": "metascraper-x", + "description": "Metascraper integration for x.com", + "homepage": "https://github.com/microlinkhq/metascraper/packages/metascraper-x", "version": "5.45.6", "types": "src/index.d.ts", "main": "src/index.js", @@ -11,7 +11,7 @@ "url": "https://microlink.io" }, "repository": { - "directory": "packages/metascraper-twitter", + "directory": "packages/metascraper-x", "type": "git", "url": "git+https://github.com/microlinkhq/metascraper.git" }, diff --git a/packages/metascraper-twitter/src/index.d.ts b/packages/metascraper-x/src/index.d.ts similarity index 100% rename from packages/metascraper-twitter/src/index.d.ts rename to packages/metascraper-x/src/index.d.ts diff --git a/packages/metascraper-x/src/index.js b/packages/metascraper-x/src/index.js new file mode 100644 index 000000000..3d474653a --- /dev/null +++ b/packages/metascraper-x/src/index.js @@ -0,0 +1,86 @@ +'use strict' + +const reachableUrl = require('reachable-url') +const { + getUrls, + author, + image, + memoizeOne, + parseUrl, + title, + toRule, + description, + url +} = require('@metascraper/helpers') + +const toAuthor = toRule(author) +const toImage = toRule(image) +const toTitle = toRule(title) +const toDescription = toRule(description) +const toUrl = toRule(url) + +const test = memoizeOne(url => + ['twitter.com', 'x.com'].includes(parseUrl(url).domain) +) + +module.exports = ({ + gotOpts, + resolveUrls = false, + resolveUrl = url => url +} = {}) => { + const rules = { + author: [ + toAuthor($ => { + const author = $('meta[property="og:title"]').attr('content') + return author?.includes(' on X') ? author.split(' on X')[0] : author + }) + ], + title: [toTitle(($, url) => `@${url.split('/')[3]} on X`)], + url: [ + toUrl($ => + $('link[rel="canonical"]').attr('href').replace('twitter.com', 'x.com') + ) + ], + description: [ + toDescription(async $ => { + let description = $('meta[property="og:description"]').attr('content') + if (!resolveUrls) return description + + const urls = getUrls(description) + + const resolvedUrls = await Promise.all( + urls.map(async url => { + const response = await reachableUrl(url, gotOpts) + if (reachableUrl.isReachable(response)) { + return resolveUrl(response.url) + } + return url + }) + ) + + for (const [index, url] of resolvedUrls.entries()) { + const original = urls[index] + description = description.replace(original, url) + } + + return description + }) + ], + image: [ + toImage($ => { + let imageUrl = $('meta[property="og:image"]').attr('content') + if (imageUrl?.endsWith('_200x200.jpg')) { + imageUrl = imageUrl.replace('_200x200.jpg', '_400x400.jpg') + } + return imageUrl + }) + ], + publisher: () => 'X' + } + + rules.test = ({ url }) => test(url) + + return rules +} + +module.exports.test = test diff --git a/packages/metascraper-x/test/fixtures/profile-video.html b/packages/metascraper-x/test/fixtures/profile-video.html new file mode 100644 index 000000000..2ca6c58b5 --- /dev/null +++ b/packages/metascraper-x/test/fixtures/profile-video.html @@ -0,0 +1,90 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + javilop + + + + +
+ + \ No newline at end of file diff --git a/packages/metascraper-x/test/fixtures/profile.html b/packages/metascraper-x/test/fixtures/profile.html new file mode 100644 index 000000000..db5533419 --- /dev/null +++ b/packages/metascraper-x/test/fixtures/profile.html @@ -0,0 +1,98 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Kikobeats + + + + +
+ + \ No newline at end of file diff --git a/packages/metascraper-x/test/fixtures/tweet-gif.html b/packages/metascraper-x/test/fixtures/tweet-gif.html new file mode 100644 index 000000000..e993e8b87 --- /dev/null +++ b/packages/metascraper-x/test/fixtures/tweet-gif.html @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 880139124791029763 + + + + +
+ + \ No newline at end of file diff --git a/packages/metascraper-x/test/fixtures/tweet-image.html b/packages/metascraper-x/test/fixtures/tweet-image.html new file mode 100644 index 000000000..04c2e5ced --- /dev/null +++ b/packages/metascraper-x/test/fixtures/tweet-image.html @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 934106870834454529 + + + + +
+ + \ No newline at end of file diff --git a/packages/metascraper-x/test/fixtures/tweet.html b/packages/metascraper-x/test/fixtures/tweet.html new file mode 100644 index 000000000..5d24429fd --- /dev/null +++ b/packages/metascraper-x/test/fixtures/tweet.html @@ -0,0 +1,26 @@ +1222907250383245320
\ No newline at end of file diff --git a/packages/metascraper-twitter/test/index.js b/packages/metascraper-x/test/index.js similarity index 61% rename from packages/metascraper-twitter/test/index.js rename to packages/metascraper-x/test/index.js index 89236d3b3..04b363e75 100644 --- a/packages/metascraper-twitter/test/index.js +++ b/packages/metascraper-x/test/index.js @@ -4,13 +4,15 @@ const { readFile } = require('fs/promises') const { resolve } = require('path') const test = require('ava') -const metascraperTwitter = require('metascraper-twitter') +const metascraperX = require('metascraper-x') const createMetascraper = (...args) => require('metascraper')([ - metascraperTwitter(...args), + metascraperX(...args), require('metascraper-author')(), require('metascraper-date')(), + require('metascraper-image')(), + require('metascraper-video')(), require('metascraper-description')(), require('metascraper-lang')(), require('metascraper-publisher')(), @@ -18,52 +20,57 @@ const createMetascraper = (...args) => require('metascraper-url')() ]) -test('from a Twitter profile', async t => { - const url = 'https://twitter.com/Kikobeats' +test('from a X profile', async t => { + const url = 'https://x.com/Kikobeats' const html = await readFile(resolve(__dirname, 'fixtures/profile.html')) - const metascraper = createMetascraper() const metadata = await metascraper({ url, html }) + t.snapshot(metadata) +}) + +test('from a X profile resolving URLs', async t => { + const url = 'https://x.com/Kikobeats' + const html = await readFile(resolve(__dirname, 'fixtures/profile.html')) + + const resolveUrl = url => { + const urlObj = new URL(url) + urlObj.search = '' + return urlObj.toString().replace('https://', '').replace('/', '') + } + const metascraper = createMetascraper({ resolveUrls: true, resolveUrl }) + const metadata = await metascraper({ url, html }) t.snapshot(metadata) }) -test('from a Twitter profile with tweets with video', async t => { - const url = 'https://twitter.com/k4rliky' +test('from a X profile with tweets with video', async t => { + const url = 'https://x.com/javilop' const html = await readFile(resolve(__dirname, 'fixtures/profile-video.html')) - const metascraper = createMetascraper() const metadata = await metascraper({ url, html }) - t.snapshot(metadata) }) test('from a tweet', async t => { - const url = 'https://twitter.com/realDonaldTrump/status/1222907250383245320' + const url = 'https://x.com/realDonaldTrump/status/1222907250383245320' const html = await readFile(resolve(__dirname, 'fixtures/tweet.html')) - const metascraper = createMetascraper() const metadata = await metascraper({ url, html }) - t.snapshot(metadata) }) test('from a tweet with a gif', async t => { - const url = 'https://twitter.com/Kikobeats/status/880139124791029763' + const url = 'https://x.com/Kikobeats/status/880139124791029763' const html = await readFile(resolve(__dirname, 'fixtures/tweet-gif.html')) - const metascraper = createMetascraper() const metadata = await metascraper({ url, html }) - t.snapshot(metadata) }) test('from a tweet with an image', async t => { - const url = 'https://twitter.com/k4rliky/status/934482867480121345' + const url = 'https://x.com/UaSmart/status/934106870834454529' const html = await readFile(resolve(__dirname, 'fixtures/tweet-image.html')) - const metascraper = createMetascraper() const metadata = await metascraper({ url, html }) - t.snapshot(metadata) }) diff --git a/packages/metascraper-x/test/snapshots/index.js.md b/packages/metascraper-x/test/snapshots/index.js.md new file mode 100644 index 000000000..3212fef16 --- /dev/null +++ b/packages/metascraper-x/test/snapshots/index.js.md @@ -0,0 +1,101 @@ +# Snapshot report for `test/index.js` + +The actual snapshot is saved in `index.js.snap`. + +Generated by [AVA](https://avajs.dev). + +## from a X profile + +> Snapshot 1 + + { + author: '#!/kiko/beats (Kikobeats)', + date: '2024-05-20T09:10:09.000Z', + description: 'engineering ▲ @vercel; founder of https://t.co/4PQvCsVNsA https://t.co/fpiHwbEPBv https://t.co/IG8Qq0IDKi https://t.co/gblDRx1P9D https://t.co/SmoZi3hAhb https://t.co/Y0Uk1XU3Eu https://t.co/PAq3eTEhmI', + image: 'https://pbs.twimg.com/profile_images/1717583638991138816/4HvMeeps_400x400.jpg', + lang: 'en', + publisher: 'X', + title: '@Kikobeats on X', + url: 'https://x.com/Kikobeats', + video: null, + } + +## from a X profile resolving URLs + +> Snapshot 1 + + { + author: '#!/kiko/beats (Kikobeats)', + date: '2024-05-20T09:10:09.000Z', + description: 'engineering ▲ @vercel; founder of microlink.io teslahunt.io unavatar.io keyvhq.js.org osom.js.org browserless.js.org metascraper.js.org', + image: 'https://pbs.twimg.com/profile_images/1717583638991138816/4HvMeeps_400x400.jpg', + lang: 'en', + publisher: 'X', + title: '@Kikobeats on X', + url: 'https://x.com/Kikobeats', + video: null, + } + +## from a X profile with tweets with video + +> Snapshot 1 + + { + author: 'Javi López ⛩️ (javilop)', + date: '2024-05-20T09:35:21.000Z', + description: 'Comparto tutoriales, herramientas y noticias de IA. Fundador @Magnific_AI 🔥 Guía IAs: https://t.co/JApwm5Tmfo 🗞️ Newsletter: https://t.co/tMELO1P8Wk', + image: 'https://pbs.twimg.com/profile_images/1581679886267301888/BHGZpOc6_400x400.jpg', + lang: 'en', + publisher: 'X', + title: '@javilop on X', + url: 'https://x.com/javilop/', + video: null, + } + +## from a tweet + +> Snapshot 1 + + { + author: 'Donald J. Trump (realDonaldTrump)', + date: '2024-05-20T09:39:36.000Z', + description: '“Schiff blasted for not focusing on California homeless.” @foxandfriends His District is in terrible shape. He is a corrupt pol who only dreams of the Impeachment Hoax. In my opinion he is mentally deranged!', + image: 'https://pbs.twimg.com/profile_images/874276197357596672/kUuht00m_400x400.jpg', + lang: 'en', + publisher: 'X', + title: '@realDonaldTrump on X', + url: 'https://x.com/realDonaldTrump/status/1222907250383245320', + video: null, + } + +## from a tweet with a gif + +> Snapshot 1 + + { + author: '#!/kiko/beats (Kikobeats)', + date: '2024-05-20T09:40:45.000Z', + description: 'Experimenting with Clearbit API + Apple TV 3D Parallax https://t.co/Qsm163k4mJ', + image: 'https://pbs.twimg.com/tweet_video_thumb/DDbh3WCXYAAZfz9.jpg:large', + lang: 'en', + publisher: 'X', + title: '@Kikobeats on X', + url: 'https://x.com/Kikobeats/status/880139124791029763', + video: null, + } + +## from a tweet with an image + +> Snapshot 1 + + { + author: 'SmartUA (UaSmart)', + date: '2024-05-20T09:48:26.000Z', + description: 'Y terminamos el dia con Cultura de empresa con @patoroco, @flopezluis, Katia, Angélica en @codemotion_es #codemotion2017', + image: 'https://pbs.twimg.com/media/DPadOKpXcAIL-NW.jpg:large', + lang: 'en', + publisher: 'X', + title: '@UaSmart on X', + url: 'https://x.com/UaSmart/status/934106870834454529', + video: null, + } diff --git a/packages/metascraper-x/test/snapshots/index.js.snap b/packages/metascraper-x/test/snapshots/index.js.snap new file mode 100644 index 000000000..b8b8a8b37 Binary files /dev/null and b/packages/metascraper-x/test/snapshots/index.js.snap differ diff --git a/packages/metascraper-twitter/test/test.js b/packages/metascraper-x/test/test.js similarity index 72% rename from packages/metascraper-twitter/test/test.js rename to packages/metascraper-x/test/test.js index 4291d4fab..69274cb13 100644 --- a/packages/metascraper-twitter/test/test.js +++ b/packages/metascraper-x/test/test.js @@ -5,9 +5,7 @@ const test = require('ava') const { test: validator } = require('..') test('true', t => { - t.true( - validator('https://twitter.com/realDonaldTrump/status/1222907250383245320') - ) + t.true(validator('https://x.com/realDonaldTrump/status/1222907250383245320')) }) test('false', t => {