-
Notifications
You must be signed in to change notification settings - Fork 3
/
roster_scrape.R
65 lines (54 loc) · 2.23 KB
/
roster_scrape.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
`%>%`<-magrittr::`%>%`
teams <- nflfastR::teams_colors_logos %>%
dplyr::mutate(
pagename = stringr::str_replace_all(stringr::str_to_lower(team_name), " ", "-")
) %>%
dplyr::select(team_abbr, team_name, pagename) %>%
dplyr::filter(!team_abbr %in% c("LAR", "OAK", "SD", "STL"))
inds <- seq_along(teams$pagename)
future::plan("multiprocess")
progressr::with_progress({
p <- progressr::progressor(along = inds)
roster_raw <-
# purrr::map_dfr(inds, function(x) {
furrr::future_map_dfr(inds, function(x) {
roster <- glue::glue("https://www.nfl.com/teams/{teams$pagename[[x]]}/roster") %>%
xml2::read_html() %>%
rvest::html_table() %>%
.[[1]] %>%
tibble::as_tibble() %>%
dplyr::mutate(
team_abbr = teams$team_abbr[[x]],
team_name = teams$team_name[[x]]
)
Sys.sleep(.1)
p(sprintf("x=%g", x))
return(roster)
}) %>%
dplyr::select(team_abbr, team_name, dplyr::everything()) %>%
dplyr::mutate(
scrape_day = lubridate::today("UTC"),
scrape_point = lubridate::now("UTC"),
season = dplyr::if_else(
lubridate::month(scrape_day) < 3,
lubridate::year(scrape_day)-1,
lubridate::year(scrape_day)
)
)
})
scrape_day = lubridate::today("UTC")
saveRDS(roster_raw, glue::glue('roster/{lubridate::year(scrape_day)}_{format.Date(scrape_day, "%m")}_{format.Date(scrape_day, "%d")}_roster.rds'))
saveRDS(roster_raw, 'roster/roster.rds')
#github setup stuff
if (grepl("Documents",getwd())){
path <- ".."
} else { ### server
path <- "/home/ben"
}
password = as.character(read.delim(glue::glue('{path}/gh.txt'))$pw)
data_repo <- git2r::repository('./') # Set up connection to repository folder
git2r::add(data_repo,'roster/*') # add specific files to staging of commit
git2r::commit(data_repo, message = glue::glue("Updating at {Sys.time()}")) # commit the staged files with the chosen message
git2r::pull(data_repo) # pull repo (and pray there are no merge commits)
git2r::push(data_repo, credentials = git2r::cred_user_pass(username = 'guga31bb', password = paste(password))) # push commit
message(paste('Successfully uploaded to GitHub at',Sys.time())) # I have cron set up to pipe this message to healthchecks.io