forked from MTG/metadb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlookup.py
108 lines (83 loc) · 3.24 KB
/
lookup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from argparse import ArgumentParser
import importlib
import sys
import csv
import json
import os.path
def _get_module_by_path(modulepath):
try:
package = importlib.import_module(modulepath)
return package
except ImportError:
raise Exception("Cannot load module %s" % modulepath)
def save(result, outfile):
with open(outfile, 'w') as f:
json.dump(result, f)
def process_file(module, filename, save=False):
with open(filename) as csvfile:
for query in csv.DictReader(csvfile):
if not 'module' in query:
query['module'] = module
if not 'save' in query:
query['save'] = save
if not 'year' in query:
query['year'] = None
if not 'artist' in query:
query['artist'] = None
if not 'recording' in query:
query['recording'] = None
if not 'mbid' in query:
query['mbid'] = None
process(query)
def process(query):
if not 'module' in query or not query['module']:
raise Exception("Missing module information for the query", json.dumps(query))
module = _get_module_by_path(query['module'])
if not hasattr(module, "scrape"):
raise Exception("Module %s must have a .scrape method" % module)
if not query['mbid']:
raise Exception("Missing MBID for the query", json.dumps(query))
if query['save']:
# Check if result file already exists
outfile = query['mbid'] + '.json'
if os.path.exists(outfile):
print "File", outfile, "found, skipping query"
return
try:
result, result_type = module.scrape(query)
except Exception, e:
print str(e)
return
# Save empty results too
#if not result:
# return
result = {
'type': result_type,
'mbid': query['mbid'],
'scraper': query['module'],
'result': result
}
if query['save']:
save(result, outfile)
else:
print result
print
if __name__ == "__main__":
parser = ArgumentParser(description = """
MetaDB metadata scraper.
""")
parser.add_argument('--module', help='Scraper module python path, e.g. metadb.scrapers.lastfm', required=False)
parser.add_argument('--csv', help='Use input csv file for queries', required=False)
parser.add_argument('--artist', help='Artist name', required=False)
parser.add_argument('--recording', help='Recording title', required=False)
parser.add_argument('--release', help='Release title', required=False)
parser.add_argument('--year', help='Year', required=False)
parser.add_argument('--mbid', help='Associated (artist/recording/release) MBID to store data for', required=False)
parser.add_argument('--save', help="Save to file", action='store_true', default=False)
args = parser.parse_args()
if args.csv:
if args.artist or args.recording or args.release or args.mbid:
print 'Performing queries using data in ', args.csv, 'file; --artist/--recording/--release/--mbid flags will be ignored'
process_file(args.module, args.csv, args.save)
else:
process(args.__dict__)