This repository has been archived by the owner on Aug 9, 2024. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdater.py
581 lines (486 loc) · 23.6 KB
/
updater.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
# standard imports
import argparse
from datetime import datetime
import json
import os
import re
from queue import Queue
import threading
import time
from typing import Callable, Optional
# lib imports
import requests
# load env
from dotenv import load_dotenv
load_dotenv()
# setup queue and lock
queue = Queue()
lock = threading.Lock()
# GitHub headers
github_headers = {
'Accept': 'application/vnd.github+json',
'Authorization': f'Bearer {os.getenv("PAT_TOKEN") if os.getenv("PAT_TOKEN") else os.getenv("GH_TOKEN")}'
}
plugin_file = os.path.join('database', 'plugins.json')
if os.path.isfile(plugin_file):
with open(file=plugin_file, mode='r') as og_f:
og_data = json.load(fp=og_f) # get currently saved data
else:
og_data = dict()
def exception_writer(error: Exception, name: str, end_program: bool = False) -> None:
print(f'Error processing {name}: {error}')
files = ['comment.md', 'exceptions.md']
for file in files:
with open(file, "a") as f:
f.write(f'# :bangbang: **Exception Occurred** :bangbang:\n\n```txt\n{error}\n```\n\n')
if end_program:
raise error
def requests_loop(url: str,
headers: Optional[dict] = None,
method: Callable = requests.get,
max_tries: int = 8,
allow_statuses: list = [requests.codes.ok],
github_wait: bool = False) -> requests.Response:
count = 1
while count <= max_tries:
if github_wait:
wait_github_api_limit(headers=headers, resources=['core']) # core is the only resource we're using
print(f'Processing {url} ... (attempt {count} of {max_tries})')
try:
response = method(url=url, headers=headers)
except requests.exceptions.RequestException as e:
print(f'Error processing {url} - {e}')
time.sleep(2 ** count)
count += 1
except Exception as e:
print(f'Error processing {url} - {e}')
time.sleep(2 ** count)
count += 1
else:
if response.status_code in allow_statuses:
return response
else:
print(f'Error processing {url} - {response.status_code}')
time.sleep(2 ** count)
count += 1
def wait_github_api_limit(headers: Optional[dict] = None, resources: Optional[list] = None) -> None:
while True:
# test if we are hitting the GitHub API limit
response = requests.get(url='https://api.github.com/rate_limit', headers=headers)
rate_limit = response.json()
# current time as a UTC timestamp
current_time = datetime.utcnow().timestamp()
# don't use more than 1/4 of the rate limit
sleep_time = 0
if not resources:
if rate_limit['rate']['limit'] > 0 and rate_limit['rate']['remaining'] < rate_limit['rate']['limit'] / 4:
wait_time = rate_limit['rate']['reset'] - current_time
print(f'rate wait_time: {wait_time}')
sleep_time = wait_time if wait_time > sleep_time else sleep_time
for resource in rate_limit['resources']:
if (resources and resource in resources) or not resources:
if rate_limit['resources'][resource]['limit'] > 0 and \
rate_limit['resources'][resource]['remaining'] < rate_limit['resources'][resource]['limit'] / 4:
wait_time = rate_limit['resources'][resource]['reset'] - current_time
print(f'{resource} wait_time: {wait_time}')
sleep_time = wait_time if wait_time > sleep_time else sleep_time
if sleep_time > 0:
print(f'Waiting {sleep_time} seconds to avoid GitHub API limit...')
time.sleep(sleep_time)
else:
return
def process_queue() -> None:
"""
Add items to the queue.
This is an endless loop to add items to the queue.
Examples
--------
>>> threads = threading.Thread(target=process_queue, daemon=True)
...
"""
while True:
item = queue.get()
queue_handler(item=item) # process the item from the queue
queue.task_done() # tells the queue that we are done with this item
def queue_handler(item: str) -> None:
git_owner, git_repo = check_github(data=dict(github_url=item))
process_github_url(owner=git_owner, repo=git_repo)
# create multiple threads for processing items faster
# number of threads
for t in range(10):
try:
# for each thread, start it
t = threading.Thread(target=process_queue)
# when we set daemon to true, that thread will end when the main thread ends
t.daemon = True
# start the daemon thread
t.start()
except RuntimeError as r_e:
print(f'RuntimeError encountered: {r_e}')
break
def process_github_url(owner: str, repo: str, submission: Optional[dict] = None) -> dict:
api_repo_url = f'https://api.github.com/repos/{owner}/{repo}'
response = requests_loop(url=api_repo_url, headers=github_headers, github_wait=True)
github_data = response.json()
try:
github_data['id']
except KeyError as e:
raise Exception(f'Error processing plugin: {e}')
else:
# get issues data
issue_data = requests_loop(url=f'{api_repo_url}/issues', headers=github_headers, github_wait=True).json()
open_issues = 0
open_pull_requests = 0
for issue in issue_data:
try:
issue['pull_request']
except KeyError:
open_issues += 1
else:
open_pull_requests += 1
# get gh-pages data, this will return a 404 if the repo doesn't have gh-pages
# GitHub token requires repo scope for this end point, so can't use this for PRs from forks :(
if os.getenv("PAT_TOKEN"):
response = requests_loop(url=f'{api_repo_url}/pages', headers=github_headers,
allow_statuses=[requests.codes.ok, 404], github_wait=True)
if response.status_code == 404:
gh_pages_url = None
else:
gh_pages_data = response.json()
gh_pages_url = gh_pages_data['html_url']
else:
gh_pages_url = None
# setup downloads, i.e. releases and branches data
downloads = []
# get releases data
releases_data = requests_loop(url=f'{api_repo_url}/releases', headers=github_headers, github_wait=True).json()
for release in releases_data:
if release['draft']:
continue
download_assets = dict()
if release['assets']:
for asset in release['assets']:
if asset['name'].lower().endswith('.zip'):
download_assets[asset['name']] = asset['browser_download_url']
# add the zipball url at the end
download_assets['zipball'] = release['zipball_url']
downloads.append(dict(
type='release',
date=release['published_at'],
name=release['name'],
release_tag=release['tag_name'],
download_assets=download_assets,
prerelease=release['prerelease'],
))
# get branch data
branches_data = requests_loop(url=f'{api_repo_url}/branches', headers=github_headers, github_wait=True).json()
for branch in branches_data:
# get commit date
commit_data = requests_loop(url=f'{api_repo_url}/commits/{branch["commit"]["sha"]}',
headers=github_headers, github_wait=True).json()
date = commit_data['commit']['author']['date']
download_assets = dict(
zipball=f'https://github.com/{owner}/{repo}/archive/refs/heads/{branch["name"]}.zip'
)
downloads.append(dict(
type='branch',
date=date,
name=branch['name'],
commit_sha=branch['commit']['sha'],
download_assets=download_assets,
default_branch=True if branch['name'] == github_data['default_branch'] else False,
))
# sort downloads by date
downloads = sorted(downloads, key=lambda sort_key: sort_key['date'], reverse=True)
# find icon-default.png in repo and use that as the thumb icon
image_extensions = ['png', 'jpg', 'jpeg']
directory_list = ['Contents', 'Resources']
attribution_image_url = None
thumb_image_url = None
path = ''
loop = True # loop while this is true
while loop:
next_loop = False
repo_contents = requests_loop(
url=f'{api_repo_url}/contents{path}',
headers=github_headers,
max_tries=5,
github_wait=True
).json()
for item in repo_contents:
# directories
if item['type'] == 'dir' and (item['name'] in directory_list or item['name'].endswith('.bundle')):
path = f'{path}/{item["name"]}'
next_loop = True
break # break the for loop and continue the while loop
elif item['type'] == 'file' and item['name'].rsplit('.', 1)[-1] in image_extensions:
file_name = item['name'].rsplit('.', 1)[0]
if file_name == 'icon-default':
thumb_image_url = item['download_url']
elif file_name == 'attribution':
attribution_image_url = item['download_url']
loop = next_loop
# get the original data, not available through APIs
non_github_data = dict()
categories = None
scanner_mapping = None
try:
args
except NameError:
pass
else:
# move these keys to non GitHub data dict as they don't exist in the GitHub API
obsolete_keys = [
'branches',
'releases',
]
try:
for k in og_data[str(github_data['id'])]:
if k in obsolete_keys:
continue
if k not in github_data:
non_github_data[k] = og_data[str(github_data['id'])][k]
except KeyError as e:
if args.daily_update:
exception_writer(error=Exception(f'Error processing plugin: {e}'), name='og_data', end_program=True)
# okay if issue update
if args.daily_update:
categories = og_data[str(github_data['id'])]['categories']
try:
scanner_mapping = og_data[str(github_data['id'])]['scanner_mapping']
except KeyError:
scanner_mapping = dict( # default dictionary for migration purposes
Common=[],
Movies=[],
Music=[],
Series=[]
)
elif args.issue_update:
# add the categories to the data
if submission['categories']:
categories = submission['categories']
else:
exception_writer(error=Exception('No categories selected'), name='categories')
categories = ':bangbang: NONE :bangbang:'
scanner_mapping = submission['scanner_mapping']
scanners = []
# check the scanner mapping
for k in scanner_mapping:
if scanner_mapping[k]:
for scanner in scanner_mapping[k]:
if not scanner.endswith('.py'):
exception_writer(error=Exception(f'Invalid file extension for scanner: {scanner}'),
name='scanner_mapping')
break
file_check_response = requests_loop(
url=f'{api_repo_url}/contents/{scanner}',
headers=github_headers,
max_tries=5,
github_wait=True,
allow_statuses=[requests.codes.ok, 404] # process 404 later, reduce API usage
)
if not file_check_response or file_check_response.status_code == 404:
exception_writer(error=Exception(f'Invalid scanner path: {scanner}'),
name='scanner_mapping')
break
# check if file
if file_check_response.json()['type'] != 'file':
exception_writer(error=Exception(f'Found "{scanner}" but it is not a file.'),
name='scanner_mapping')
break
# if we made it this far, add the scanner to the list
scanners.append(scanner)
if "Scanner" in categories:
if not scanners:
# check if "Scanners" directory exists
file_check_response = requests_loop(
url=f'{api_repo_url}/contents/Scanners',
headers=github_headers,
max_tries=5,
github_wait=True
)
if not file_check_response:
exception_writer(error=Exception('No "Scanners" directory found in repo.'), name='scanners')
else:
file_check_data = file_check_response.json()
# check if directory
valid_scanner_directories = ['Common', 'Movies', 'Music', 'Series']
for item in file_check_data:
if item['name'] in valid_scanner_directories:
# check if directory
if item['type'] != 'dir':
exception_writer(
error=Exception(f'Found "{item["name"]}" but it is not a directory.'),
name='scanners')
else:
# assume scanner(s) are present in the directory
scanners = True
break
if not scanners:
exception_writer(error=Exception('No valid scanners found.'), name='scanners')
with lock:
# only GitHub data first, where keys match exactly
og_data[str(github_data['id'])] = {
'archived': github_data['archived'],
'default_branch': github_data['default_branch'],
'description': github_data['description'],
'disabled': github_data['disabled'],
'forks_count': github_data['forks_count'],
'full_name': github_data['full_name'],
'has_discussions': github_data['has_discussions'],
'has_downloads': github_data['has_downloads'],
'has_issues': github_data['has_issues'],
'has_wiki': github_data['has_wiki'],
'homepage': github_data['homepage'],
'html_url': github_data['html_url'],
'name': github_data['name'],
'stargazers_count': github_data['stargazers_count'],
}
# combine the non-github data
og_data[str(github_data['id'])].update(non_github_data)
# then add data where keys don't match, or value adjusted manually
og_data[str(github_data['id'])]['attribution_image_url'] = attribution_image_url
og_data[str(github_data['id'])]['avatar_image_url'] = github_data['owner']['avatar_url']
og_data[str(github_data['id'])]['categories'] = categories
og_data[str(github_data['id'])]['downloads'] = downloads
og_data[str(github_data['id'])]['gh_pages_url'] = gh_pages_url
og_data[str(github_data['id'])]['license'] = None if not github_data['license'] else \
github_data['license']['name']
og_data[str(github_data['id'])]['license_url'] = None if not github_data['license'] else \
github_data['license']['url']
og_data[str(github_data['id'])]['open_issues_count'] = open_issues
og_data[str(github_data['id'])]['open_pull_requests_count'] = open_pull_requests
og_data[str(github_data['id'])]['scanner_mapping'] = scanner_mapping
og_data[str(github_data['id'])]['thumb_image_url'] = thumb_image_url
# remove `.bundle` from end of name and full name
test = '.bundle'
t_len = len(test)
test_keys = ['name', 'full_name']
for k in test_keys:
if og_data[str(github_data['id'])][k].endswith(test):
og_data[str(github_data['id'])][k] = og_data[str(github_data['id'])][k][:-t_len]
# todo - find more reliable way to test if wiki exists
# test wiki pages and overwrite value if wiki is empty
# with lock: # ensure only one thread is making a request to GitHub at a time
# if github_data['has_wiki']:
# test_url = f'https://github.com/search?q=repo:{owner}/{repo}&type=wikis'
# test_wiki = requests_loop(url=test_url, github_wait=True)
# if test_wiki.status_code == requests.codes.ok:
# # see if string in contents
# # not logged in
# if f'We couldn’t find any wiki pages matching 'repo:{owner}/{repo}'' in test_wiki.text:
# og_data[str(github_data['id'])]['has_wiki'] = False
# # logged in
# if 'Your search did not match any <!-- -->wikis' in test_wiki.text:
# og_data[str(github_data['id'])]['has_wiki'] = False
# else:
# og_data[str(github_data['id'])]['has_wiki'] = False
# exception_writer(error=Exception(f'Unable to search wiki for {owner}/{repo}'), name='GitHub Wiki')
try:
args.issue_update
except NameError:
pass
else:
if args.issue_update:
# create the issue comment and title files
issue_comment = """
| Property | Value |
| --- | --- |
"""
# dynamically create the Markdown table
for data_key, value in og_data[str(github_data['id'])].items():
if 'image_url' in data_key and 'avatar' not in data_key and value:
issue_comment += f'| {data_key} | ![{data_key}]({value}) |\n'
else:
issue_comment += f'| {data_key} | {value} |\n'
with open("comment.md", "a") as comment_f:
comment_f.write(issue_comment)
with open("title.md", "w") as title_f:
title_f.write(f'[PLUGIN]: {github_data["full_name"]}')
# update user ids
original_submission = False
with lock:
try:
og_data[str(github_data['id'])]['plugin_added_by']
except KeyError:
original_submission = True
og_data[str(github_data['id'])]['plugin_added_by'] = os.environ['ISSUE_AUTHOR_USER_ID']
finally:
og_data[str(github_data['id'])]['plugin_edited_by'] = os.environ['ISSUE_AUTHOR_USER_ID']
# update contributor info
update_contributor_info(original=original_submission, base_dir='database')
return github_data
def update_contributor_info(original: bool, base_dir: str) -> None:
contributor_file_path = os.path.join(base_dir, 'contributors.json')
# create file if it doesn't exist
if not os.path.exists(contributor_file_path):
with open(contributor_file_path, 'w') as contributor_f:
json.dump(obj={}, indent=4, fp=contributor_f, sort_keys=True)
with open(contributor_file_path, 'r') as contributor_f:
contributor_data = json.load(contributor_f)
try:
contributor_data[os.environ['ISSUE_AUTHOR_USER_ID']]
except KeyError:
contributor_data[os.environ['ISSUE_AUTHOR_USER_ID']] = dict(
items_added=1,
items_edited=0
)
else:
if original:
contributor_data[os.environ['ISSUE_AUTHOR_USER_ID']]['items_added'] += 1
else:
contributor_data[os.environ['ISSUE_AUTHOR_USER_ID']]['items_edited'] += 1
with open(contributor_file_path, 'w') as contributor_f:
json.dump(obj=contributor_data, indent=4, fp=contributor_f, sort_keys=True)
def process_issue_update() -> None:
# process submission file
submission = process_submission()
# check validity of provided GitHub url
git_owner, git_repo = check_github(data=submission)
process_github_url(owner=git_owner, repo=git_repo, submission=submission)
def check_github(data: dict) -> tuple:
print('Checking GitHub url')
url = data['github_url'].strip()
print(f'github_url: {url}')
# extract GitHub user and repo from url using regex
match = re.search(pattern=r'github\.com/([a-zA-Z0-9-]+)/(.*)/?.*', string=url)
if match:
owner = match.group(1)
repo = match.group(2)
return owner, repo
else:
raise SystemExit('Invalid GitHub url')
def process_submission() -> dict:
with open(file='submission.json') as file:
data = json.load(file)
# convert string to list
try:
data['categories'] = data['categories'].split(', ')
except KeyError:
exception_writer(error=Exception('No categories provided'), name='categories', end_program=True)
# convert json string to dict, removing ```JSON from start and ``` from end of string
try:
data['scanner_mapping'] = json.loads(data['scanner_mapping'].strip().strip('`').strip('JSON'))
except KeyError:
exception_writer(error=Exception('No scanner mapping provided'), name='scanner_mapping', end_program=True)
except json.decoder.JSONDecodeError:
exception_writer(error=Exception('Invalid scanner mapping provided'), name='scanner_mapping', end_program=True)
return data
if __name__ == '__main__':
# setup arguments using argparse
parser = argparse.ArgumentParser(description="Add plugin to database.")
parser.add_argument('--daily_update', action='store_true', help='Run in daily update mode.')
parser.add_argument('--issue_update', action='store_true', help='Run in issue update mode.')
args = parser.parse_args()
destination_dir = os.path.dirname(plugin_file)
os.makedirs(name=destination_dir, exist_ok=True) # create directory if it doesn't exist
if args.issue_update:
process_issue_update()
elif args.daily_update:
# migration tasks go here
for key in og_data:
queue.put(og_data[key]['html_url'])
# finish queue before writing final files
queue.join()
with open(plugin_file, "w") as dest_f:
json.dump(obj=og_data, indent=4, fp=dest_f, sort_keys=True)