-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathscrape.py
34 lines (30 loc) · 1.01 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# Simple search for terms in SMB2021 web pages
# Author: Randy Heiland
from urllib.request import urlopen
import re
#pages = ["http://schedule.smb2021.org/CBBS/"]
# Demo with a few pages
pages = ["http://schedule.smb2021.org/CBBS/", \
"http://schedule.smb2021.org/CDEV/", \
"http://schedule.smb2021.org/DDMB/", \
"http://schedule.smb2021.org/DDMB/", \
"http://schedule.smb2021.org/IMMU/", \
]
for url in pages:
# print(url)
page = urlopen(url)
html = page.read().decode("utf-8")
# print(html)
string_to_find = "cyano"
# print(" --- find ")
found = re.findall(string_to_find, html, re.IGNORECASE)
if found:
print(string_to_find, " found in " ,url)
string_to_find = "ODE"
found = re.findall(string_to_find, html)
if found:
print(string_to_find, " found in " ,url)
string_to_find = "boolean"
found = re.findall(string_to_find, html, re.IGNORECASE)
if found:
print(string_to_find, " found in " ,url)