-
Notifications
You must be signed in to change notification settings - Fork 0
/
save_article.py
executable file
·174 lines (143 loc) · 4.53 KB
/
save_article.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/usr/bin/env python
"""
This file is for saving an article to AirTable.
Usage: save_article.py "The Source" http://someurl
"""
from __future__ import absolute_import
from datetime import date
import requests
import sys
from models.article import Article
from models.source import Source
from urler import Urler
from airtable import Airtable
class Save(object):
def __init__(self, prog, args):
self.prog = prog
self.args = args
self.airtable = Airtable()
def usage(self):
return "%s [%s]" % (self.prog, self.args)
def check_args(self, is_valid):
if not is_valid:
print "Invalid args!"
print self.usage()
sys.exit()
def format_payload(self, thing):
payload = {}
payload["records"] = [{"fields": thing.format()}]
payload["typecast"] = True
return payload
def post(self, thing, alt=True):
headers = {
"Authorization": "Bearer %s" % thing.config["key"],
"Content-type": "application/json; charset=utf-8"
}
payload = self.format_payload(thing)
api_url = (thing.alt_api_url if alt else thing.api_url)
res = requests.post(
api_url,
json=payload,
headers=headers,
)
print res.json()
class SaveArticle(Save):
def __init__(self, prog):
super(SaveArticle, self).__init__(prog, "url")
def strip_source(self, text, source):
return text
def get_metadata(self, url):
u = Urler(url)
u.fetch()
return u
def save(self, url, source_id, thedate, title, notes=None,
min_to_read=None, apple_url=None):
self.article = Article(
{
"URL": url,
"Title": title,
"Notes": notes,
"Source": [source_id],
"MinToRead": min_to_read,
"Date": thedate,
}
)
# TODO: this is broken
if self.article_exists(apple_url):
print "Article already exists!"
return
print "Saving Article"
print self.article.title
print self.article.notes
print url
print ""
self.post(self.article)
def article_exists(self, apple_url=None):
params = {}
params["filterByFormula"] = "AND(URL = \"%s\")" % self.article.url
exists = self.find_existing("Articles", params)
if exists:
return True
exists = self.find_existing("Possible%20Articles", params)
if exists:
return True
if apple_url:
params["filterByFormula"] = "AND(URL = \"%s\")" % apple_url
exists = self.find_existing("Articles", params)
if exists:
return True
exists = self.find_existing("Possible%20Articles", params)
if exists:
return True
return False
def find_existing(self, table, params):
existing = self.airtable.find("Articles", params)
print existing
if (len(existing) > 0 and "records" in existing[0]
and len(existing[0]["records"]) > 0):
return True
return False
def go(self, url, thedate):
metadata = s.get_metadata(url)
s.save(
url=url,
source_id=source_id,
thedate=thedate,
title=metadata.title,
notes=metadata.notes,
min_to_read=metadata.min_to_read,
apple_url=metadata.apple_url,
)
def sub_common_sources(name):
common = {
"WP": "Washington Post",
"NYT": "New York Times",
"NY": "New Yorker",
"TA": "The Atlantic",
"TG": "The Guardian",
"WW": "Willamette Week",
"TO": "The Oregonian",
"VF": "Vanity Fair",
"ABC": "ABC News",
"CBS": "CBS News",
"DB": "Daily Beast",
"BF": "BuzzFeed News",
"WSJ": "Wall Street Journal",
"NBC": "NBC News",
}
return common[name] if name in common.keys() else name
if __name__ == "__main__":
s = SaveArticle(sys.argv[0])
s.check_args(len(sys.argv) == 3)
url = sys.argv[2]
thedate = date.today().isoformat()
source = Source()
# uncomment this to update sources
source.write_sources()
source_name = sub_common_sources(sys.argv[1])
print source_name
source_id = source.get_id_from_name(source_name)
if not source_id:
print "No source found for %s" % sys.argv[1]
exit
s.go(url, thedate)