From 6dacbcdbac53f7d02a826ffa05560c03c7f5721e Mon Sep 17 00:00:00 2001 From: Micha Moskovic Date: Mon, 14 Oct 2024 12:42:35 +0200 Subject: [PATCH 1/2] Add script to fix fulltexts from RuPAC2021 --- scripts/fix-rupac2021-fulltexts/script.py | 27 +++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 scripts/fix-rupac2021-fulltexts/script.py diff --git a/scripts/fix-rupac2021-fulltexts/script.py b/scripts/fix-rupac2021-fulltexts/script.py new file mode 100644 index 0000000..09937e3 --- /dev/null +++ b/scripts/fix-rupac2021-fulltexts/script.py @@ -0,0 +1,27 @@ +from inspirehep.curation.search_check_do import SearchCheckDo + +FULLTEXT_URL = "https://jacow.org/rupac2021/papers/{}.pdf" + +class FixRupac2021Fulltexts(SearchCheckDo): + """Fix fulltexts for RuPAC 2021 (Alushta, Crimea).""" + + query = "publication_info.conference_record.$ref:1954430" + + @staticmethod + def check(record, logger, state): + return "C21-09-27.4" in record.get_value("publication_info.cnum", []) + + @staticmethod + def do(record, logger, state): + artids = record.get_value("publication_info.artid", []) + if len(artids) != 1: + logger.warning("Ambiguous article IDs.", artids=artids) + return + if (num_docs := len(record.get("documents", []))) != 1: + logger.warning("Ambiguous or missing documents.", num_docs=num_docs) + return + artid = artids[0].lower() + record["documents"] = [{"url": FULLTEXT_URL.format(artid)}] + +FixRupac2021Fulltexts() + From 729a0235ced878a4988ed3117b2451de3aaaa40b Mon Sep 17 00:00:00 2001 From: michamos Date: Mon, 14 Oct 2024 10:47:40 +0000 Subject: [PATCH 2/2] Auto-format python code --- scripts/fix-rupac2021-fulltexts/script.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/fix-rupac2021-fulltexts/script.py b/scripts/fix-rupac2021-fulltexts/script.py index 09937e3..0d01625 100644 --- a/scripts/fix-rupac2021-fulltexts/script.py +++ b/scripts/fix-rupac2021-fulltexts/script.py @@ -2,6 +2,7 @@ FULLTEXT_URL = "https://jacow.org/rupac2021/papers/{}.pdf" + class FixRupac2021Fulltexts(SearchCheckDo): """Fix fulltexts for RuPAC 2021 (Alushta, Crimea).""" @@ -23,5 +24,5 @@ def do(record, logger, state): artid = artids[0].lower() record["documents"] = [{"url": FULLTEXT_URL.format(artid)}] -FixRupac2021Fulltexts() +FixRupac2021Fulltexts()