Skip to content

Commit

Permalink
Run styler
Browse files Browse the repository at this point in the history
  • Loading branch information
Bisaloo committed Nov 5, 2024
1 parent 1471909 commit 6d90c34
Show file tree
Hide file tree
Showing 12 changed files with 67 additions and 89 deletions.
6 changes: 3 additions & 3 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#' A data.frame of historical metadata from CRAN packages epidemiology.
#'
#'
#' @format A data.frame with 5 variables:
#' \describe{
#' \item{Package}{package name}
#' \item{Version}{package version}
#' \item{Authors@R}{authors as listed in the `Authors@R` field from the
#' \item{Authors@R}{authors as listed in the `Authors@R` field from the
#' `DESCRIPTION` file}
#' \item{Author}{authors as listed in the `Author` field from the
#' \item{Author}{authors as listed in the `Author` field from the
#' `DESCRIPTION` file}
#' \item{Maintainer}{package maintainer}
#' }
Expand Down
10 changes: 5 additions & 5 deletions R/expand_names.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#' Expand names from abbreviated forms or initials
#'
#'
#' @param short A character vector of potentially abbreviated names
#' @param expanded A character vector of potentially expanded names
#'
#'
#' @details
#' When you have a list `x`of abbreviated and non-abbreviated names and you want
#' to deduplicate them, this function can be used as `expand_names(x, x)`, which
#' will return the most expanded version available in `x` for each name
#'
#'
#' @return A character vector with the same length as `short`
#'
#' @export
Expand Down Expand Up @@ -66,8 +66,8 @@ expand_names <- function(short, expanded) {
}
min(x, na.rm = TRUE)
})

short[!is.na(longest_match)] <- expanded[longest_match[!is.na(longest_match)]]

return(short)
}
49 changes: 24 additions & 25 deletions R/parse_authors.R
Original file line number Diff line number Diff line change
@@ -1,63 +1,62 @@
#' Parse the `Author` field from a DESCRIPTION file
#'
#'
#' Parse the `Author` field from a DESCRIPTION file into a `person` object
#'
#' @param author_string A character containing the `Author` or `Maintainer`
#' @param author_string A character containing the `Author` or `Maintainer`
#' field from a `DESCRIPTION` file
#'
#' @return A character vector, or a list of character vectors of length equals
#' to the length of `author_string`
#'
#'
#' @importFrom utils as.person
#'
#'
#' @examples
#' # Read from a DESCRIPTION file directly
#' utils_description <- system.file("DESCRIPTION", package = "utils")
#' utils_authors <- read.dcf(utils_description, "Author")
#'
#'
#' parse_authors(utils_authors)
#'
#'
#' # Read from a database of CRAN metadata
#' cran_epidemiology_packages$Author |>
#' parse_authors() |>
#' unlist() |>
#' unique() |>
#' cran_epidemiology_packages$Author |>
#' parse_authors() |>
#' unlist() |>
#' unique() |>
#' sort()
#'
#'
#' @export
parse_authors <- function(author_string) {

# Sanitize input from pkgsearch / crandb
author_string <- author_string |>
author_string <- author_string |>
stringi::stri_replace_all_fixed(
"<U+000a>",
" "
)
authors_no_brackets <- author_string |>
remove_brackets("(") |>

authors_no_brackets <- author_string |>
remove_brackets("(") |>
remove_brackets("[") |>
remove_brackets("<")

authors_person <- authors_no_brackets |>
authors_person <- authors_no_brackets |>
# Extra common strings
stringi::stri_replace_all_regex("\\bet\\.? al\\.?\\b", " ") |>
stringi::stri_replace_all_regex("\\b(Prof|Dr|Mr|Mrs|Ms)\\b", "") |>
stringi::stri_replace_all_regex("\\b(Prof|Dr|Mr|Mrs|Ms)\\b", "") |>
# Separators
stringi::stri_replace_all_regex("\\s+", " ") |>
stringi::stri_replace_all_regex("\\b(with contributions?|contributed datasets) (of|from|by)\\b:?", ", ") |>
stringi::stri_replace_all_regex("\\band\\b", ", ") |>
stringi::stri_split_regex("\\s*(,\\s*)+") |>
stringi::stri_replace_all_regex("\\b(with contributions?|contributed datasets) (of|from|by)\\b:?", ", ") |>
stringi::stri_replace_all_regex("\\band\\b", ", ") |>
stringi::stri_split_regex("\\s*(,\\s*)+") |>
# Clean string boundaries
lapply(function(x) stringi::stri_replace_all_regex(x, "\\.$", "")) |>
lapply(trimws) |>
lapply(function(x) stringi::stri_replace_all_regex(x, "\\.$", "")) |>
lapply(trimws) |>
# For strings terminating with ","
lapply(function(x) setdiff(x, ""))

if (length(authors_person) == 1) {
authors_person <- authors_person[[1]]
}

return(authors_person)
}

22 changes: 10 additions & 12 deletions R/parse_authors_r.R
Original file line number Diff line number Diff line change
@@ -1,36 +1,34 @@
#' Parse the `Authors@R` field from a DESCRIPTION file
#'
#'
#' Parse the `Authors@R` field from a DESCRIPTION file into a `person` object
#'
#' @param authors_r_string A character containing the `Authors@R` field from a
#' @param authors_r_string A character containing the `Authors@R` field from a
#' `DESCRIPTION` file
#'
#' @return A `person` object, or a `list` of `person` objects of length equals
#' to the length of `authors_r_string`
#'
#'
#' @examples
#' # Read from a DESCRIPTION file directly
#' pkg_description <- system.file("DESCRIPTION", package = "authoritative")
#' authors_r_pkg <- read.dcf(pkg_description, "Authors@R")
#'
#'
#' parse_authors_r(authors_r_pkg)
#'
#'
#' # Read from a database of CRAN metadata
#' cran_epidemiology_packages |>
#' cran_epidemiology_packages |>
#' subset(!is.na(`Authors@R`), `Authors@R`, drop = TRUE) |>
#' parse_authors_r() |>
#' parse_authors_r() |>
#' head()
#'
#'
#' @export
parse_authors_r <- function(authors_r_string) {

# Sanitize input from pkgsearch / crandb
authors_r_string <- authors_r_string |>
authors_r_string <- authors_r_string |>
stringi::stri_replace_all_fixed(
"<U+000a>",
" "
)

lapply(str2expression(authors_r_string), eval)

}
}
23 changes: 10 additions & 13 deletions R/remove_brackets.R
Original file line number Diff line number Diff line change
@@ -1,43 +1,40 @@
#' Remove content between brackets
#'
#'
#' @param string A character vector to clean
#' @param bracket Type of bracket containing the content to remove.
#' Must be one of "(" (default), "<", "["
#'
#'
#' @returns A character vector of the same size of `string`, where
#' elements has been stripped of their `bracket`ted content.
#'
#'
#' @export
#'
#'
#' @examples
#' remove_brackets(
#' "R source code is versioned with svn (and mirrored on GitHub)"
#' )
#'
#'
#' remove_brackets(
#' c("r-project.org <https://r-project.org>", "python.org <https://python.org>"),
#' bracket = "<"
#' )
#'
#'
#' remove_brackets(
#' "Content in square bracket [is removed] in the middle of a string",
#' bracket = "["
#' )
remove_brackets <- function(string, bracket = c("(", "<", "[")) {

bracket <- match.arg(bracket)

closing_bracket <- switch(
bracket,
closing_bracket <- switch(bracket,
"<" = ">",
"(" = ")",
"[" = "]"
)
string |>

string |>
stringi::stri_replace_all_regex(
paste0("\\", bracket, "[^", closing_bracket, "]*\\", closing_bracket),
""
)

}
}
8 changes: 4 additions & 4 deletions data-raw/cran_epidemiology_packages.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ download.file(
tf
)

ctv_pkgs <- ctv::read.ctv(tf) |>
purrr::pluck("packagelist", "name")
ctv_pkgs <- ctv::read.ctv(tf) |>
purrr::pluck("packagelist", "name")

cran_epidemiology_packages <- ctv_pkgs |>
purrr::map(pkgsearch::cran_package_history)

cran_epidemiology_packages <- cran_epidemiology_packages |>
cran_epidemiology_packages <- cran_epidemiology_packages |>
dplyr::select(
Package,
Package,
Version,
`Authors@R`,
Author,
Expand Down
2 changes: 1 addition & 1 deletion man/authoritative-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions tests/testthat/test-dev-utils.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
test_that("release_bullets() returns what usethis expects", {

expect_type(release_bullets(), "character")

})
12 changes: 3 additions & 9 deletions tests/testthat/test-expand_names.R
Original file line number Diff line number Diff line change
@@ -1,43 +1,37 @@
test_that("expand_names() transforms matching names", {

expect_identical(
expand_names(
c("W A Mozart", "Wolfgang Mozart", "Wolfgang A Mozart"),
c("W A Mozart", "Wolfgang Mozart", "Wolfgang A Mozart"),
"Wolfgang Amadeus Mozart"
),
rep_len("Wolfgang Amadeus Mozart", 3)
)

expect_identical(
expand_names(
c("Wolfgang Mozart", "Johannes Bach"),
c("Wolfgang Mozart", "Johannes Bach"),
c("Johannes Sebastian Bach", "Wolfgang Amadeus Mozart")
),
c("Wolfgang Amadeus Mozart", "Johannes Sebastian Bach")
)

})

test_that("expand_names() favours longer forms", {

expect_identical(
expand_names(
c("W A Mozart", "Wolfgang Mozart", "Wolfgang A Mozart"),
c("W A Mozart", "Wolfgang Mozart", "Wolfgang A Mozart"),
c("W A Mozart", "Wolfgang Amadeus Mozart", "Wolfgang A Mozart")
),
rep_len("Wolfgang Amadeus Mozart", 3)
)

})

test_that("expand_names() leaves non-matching names untouched", {

expect_identical(
expand_names(
c("Johannes Bach", "Wolfgang Mozart"),
"Wolfgang Amadeus Mozart"
),
c("Johannes Bach", "Wolfgang Amadeus Mozart")
)

})
6 changes: 2 additions & 4 deletions tests/testthat/test-parse_authors.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
test_that("parse_authors() snapshot", {

cran_epidemiology_packages$Author |>
parse_authors() |>
unique() |>
parse_authors() |>
unique() |>
expect_snapshot()

})
6 changes: 2 additions & 4 deletions tests/testthat/test-parse_authors_R.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
test_that("parse_authors_r() snapshot", {

# This seems to be R version dependent?
skip_on_ci()

cran_epidemiology_packages$`Authors@R` |>
parse_authors_r() |>
unique() |>
parse_authors_r() |>
unique() |>
expect_snapshot()

})
10 changes: 3 additions & 7 deletions tests/testthat/test-remove_brackets.R
Original file line number Diff line number Diff line change
@@ -1,32 +1,28 @@
test_that("remove_brackets() works in simple cases", {

expect_identical(
remove_brackets("Wolfgang Amadeus Mozart (1756-1791)"),
"Wolfgang Amadeus Mozart "
)

expect_identical(
remove_brackets("A sourced claim [1]", bracket = "["),
"A sourced claim "
)

expect_identical(
remove_brackets("A sourced claim [1] [12]", bracket = "["),
"A sourced claim "
)

})

test_that("remove_brackets() works with nested brackets", {

expect_identical(
remove_brackets("A sourced claim (source: [1])", bracket = "("),
"A sourced claim "
)

expect_identical(
remove_brackets("A sourced claim (source: [1])", bracket = "["),
"A sourced claim (source: )"
)

})

0 comments on commit 6d90c34

Please sign in to comment.