-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget-music-from-page.js
141 lines (120 loc) · 4.3 KB
/
get-music-from-page.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
const fs = require('fs')
const path = require('path')
const puppeteer = require('puppeteer')
const axios = require('axios')
// Parse argv
const argv = require('yargs')
.option('dest', {
type: 'string',
demandOption: true,
desc: 'Relative path to a directory to which the music will be downloaded to'
})
.option('queue-json-file', {
type: 'string',
desc: 'Relative path to a json file which stores a json list of string urls to fetch music from'
})
.option('queue', {
type: 'array',
desc: 'list of string urls to fetch live music from'
})
.argv
const { dest, queue } = argv
const queueFile = argv['queue-json-file']
let pageQueue
if (queueFile) {
const queueFilePath = path.join(__dirname, queueFile)
try {
console.log(`Opening queue file at ${queueFilePath}..`)
const jsonQueue = fs.readFileSync(queueFilePath)
pageQueue = JSON.parse(jsonQueue)
} catch(e) {
console.error(`Error opening queue file path or parsing queue file: ${e.message}`)
process.exit(1)
}
} else if (queue) {
pageQueue = queue
} else {
console.warning('Neither cli queue or queue path passed to argv, no urls passed to parse.')
console.log('exiting...')
process.exit(0)
}
const getMusic = async (url, localPath) => {
try {
console.log(`scraping url: ${url}`)
const browser = await puppeteer.launch()
const page = await browser.newPage()
await page.goto(url)
// Scrape details
const { trackList, showName } = await page.$eval('#theatre-ia-wrap', (container) => {
const showName = container.querySelector('h1.sr-only').innerText
const trackList = []
container.querySelectorAll('div').forEach(div => {
const dupRegex = /_(\d+)$/
if (div.attributes.itemprop) {
let name = div.children['0'].attributes.content.value
let duplicate = trackList.find(track => track.name === name)
if (duplicate) {
const match = duplicate.name.match(dupRegex)
if (match) {
const n = parseInt(match[1])
name = `${name}_${n + 1}`
} else {
name = `${name}_1`
}
}
// find and replace some problematic chars
name = name.replace('>','')
name = name.replace('*','')
name = name.replace(/\//g,'_')
trackList.push({
name,
link: div.children['2'].attributes.href.textContent
})
}
})
return {
showName,
trackList
}
})
console.log(`${trackList.length} tracks found for ${showName}`)
console.log('Downloading data...')
// download tracks
const downloadDir = path.join(localPath, showName)
fs.mkdirSync(downloadDir)
for (let i=0; i < trackList.length; i++) {
const { name, link } = trackList[i]
console.log(`Downloading track: ${name}...`)
try {
const { data } = await axios({
url: link,
method: 'GET',
responseType: 'arraybuffer',
headers: {
'Content-Type': 'audio/wav'
}
})
fs.writeFileSync(path.join(downloadDir, `${name}.mp3`), data)
console.log('done')
} catch(e) {
console.log(`Error getting track "${name}": ${e.stack}`)
}
}
await page.close()
await browser.close()
console.log('complete!')
} catch (e) {
console.log(`Error for ${url}: ${e.stack}`)
}
}
const runQueue = async (queue, dest) => {
for (const url of queue) {
try {
await getMusic(url, dest)
} catch(e) {
console.log(`Error getting music from ${url}: ${e.message}`)
}
}
process.exit(0)
}
runQueue(pageQueue, path.join(__dirname, dest))