-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
executable file
·83 lines (69 loc) · 2.08 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import schedule
import sys
import time
import pandas as pd
from datetime import datetime
from src.page import Page
from src.scraper import Scraper
from src.telegram import Telegram
def print_page_info(scraper: object, page: object):
msg = '='*28
msg += '\n[name]\n'
msg += f'{page.name}\n\n'
msg += '[url]\n'
msg += f'{page.url}\n\n'
msg += '[filter_type]\n'
msg += f'{page.filter_type}\n\n'
msg += '[element]\n'
msg += f'{page.element}\n\n'
msg += '[description]\n'
msg += f'{page.description}\n\n'
msg += '[message]\n'
msg += f'{scraper.get_element_text(page)}\n'
msg += '='*28
print(msg)
def main(argv):
"""
Ejecutar de la forma:
python main.py pages.csv
"""
start = datetime.now()
for filename in argv:
path = f'./{filename}'
name, extension = os.path.splitext(filename)
if os.path.exists(path) and extension in ('.csv'):
# msg = f'Procesando archivo {name}'
# print(f'{"*" * len(msg)}\n{msg}\n{"*" * len(msg)}')
df_pages = pd.read_csv(f"./{path}", sep='|')
else:
raise Exception('No encontrado o Extension incorrecta')
scraper = Scraper()
for index, row in df_pages.iterrows():
page = Page(
name=row['name'],
url=row['url'],
filter_type=row['filter_type'],
element=row['element'],
description=row['description']
)
scraper.pages = page
telegram = Telegram()
msg = ''
for page in scraper.pages:
# print_page_info(scraper, page)
size = len(page.description)
msg += '=' * size
msg += f'\n{page.name}'
msg += f'\n{page.description}'
msg += f'\n{scraper.get_element_text(page)}\n'
telegram.send_message(msg)
stop = datetime.now()
print(f'\nTiempo => {stop - start}')
if __name__ == '__main__':
FILENAME = 'pages.csv'
# main(sys.argv[1:])
# main([FILENAME])
schedule.every().day.at('06:00').do(main([FILENAME]))