Skip to content

Commit

Permalink
Merge pull request #1332 from Princeton-CDH/bugfix/1331-tag-indexing
Browse files Browse the repository at this point in the history
Fix issue with outdated tag set in document index (#1331)
  • Loading branch information
blms authored Jul 21, 2023
2 parents b0df58d + 565be05 commit 8fbf70e
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 7 deletions.
5 changes: 4 additions & 1 deletion geniza/corpus/apps.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from django.apps import AppConfig
from django.db.models.signals import pre_save
from django.db.models.signals import m2m_changed, pre_save


class CorpusAppConfig(AppConfig):
Expand All @@ -12,4 +12,7 @@ def ready(self):
from geniza.corpus.models import TagSignalHandlers

pre_save.connect(TagSignalHandlers.unidecode_tag, sender="taggit.Tag")
m2m_changed.connect(
TagSignalHandlers.tagged_item_change, sender="taggit.TaggedItem"
)
return super().ready()
19 changes: 14 additions & 5 deletions geniza/corpus/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ class DocumentSignalHandlers:
"fragment": "fragments",
"tag": "tags",
"document type": "doctype",
"tagged item": "tagged_items",
"Related Fragment": "textblock", # textblock verbose name
"footnote": "footnotes",
"source": "footnotes__source",
Expand Down Expand Up @@ -460,6 +461,14 @@ def unidecode_tag(sender, instance, **kwargs):
"""Convert saved tags to ascii, stripping diacritics."""
instance.name = unidecode(instance.name)

@staticmethod
def tagged_item_change(sender, instance, action, **kwargs):
"""Ensure document (=instance) is indexed after the tags m2m relationship is saved and the
list of tags is pulled from the database, on any tag change."""
if action in ["post_add", "post_remove", "post_clear"]:
logger.debug("taggit.TaggedItem %s, reindexing related document", action)
ModelIndexable.index_items(Document.objects.filter(pk=instance.pk))


class DocumentQuerySet(MultilingualQuerySet):
def metadata_prefetch(self):
Expand Down Expand Up @@ -1237,9 +1246,6 @@ def merge_with(self, merge_docs, rationale, user=None):
metadata into this document, adds the merged documents into
list of old PGP IDs, and creates a log entry documenting
the merge, including the rationale."""
# initialize old pgpid list if previously unset
if self.old_pgpids is None:
self.old_pgpids = []

# if user is not specified, log entry will be associated with
# script and document will be flagged for review
Expand All @@ -1262,10 +1268,13 @@ def merge_with(self, merge_docs, rationale, user=None):
needs_review = [self.needs_review] if self.needs_review else []

for doc in merge_docs:
# add merge id to old pgpid list
self.old_pgpids.append(doc.id)
# add any tags from merge document tags to primary doc
self.tags.add(*doc.tags.names())
# initialize old pgpid list if previously unset
if self.old_pgpids is None:
self.old_pgpids = []
# add merge id to old pgpid list
self.old_pgpids.append(doc.id)
# add description if set and not duplicated
# for all supported languages
for lang_code in language_codes:
Expand Down
2 changes: 1 addition & 1 deletion geniza/corpus/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ def make_document(fragment):
(Information from Mediterranean Society, IV, p. 281)""",
doctype=DocumentType.objects.get_or_create(name_en="Legal")[0],
)
TextBlock.objects.create(document=doc, fragment=fragment)
doc.tags.add("bill of sale", "real estate")
TextBlock.objects.create(document=doc, fragment=fragment)
dctype = ContentType.objects.get_for_model(Document)
script_user = User.objects.get(username=settings.SCRIPT_USERNAME)
team_user = User.objects.get(username=settings.TEAM_USERNAME)
Expand Down
16 changes: 16 additions & 0 deletions geniza/corpus/tests/test_corpus_signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,19 @@ def test_unidecode_tags():
# pre_save signal should strip diacritics from tag and convert to ASCII
tag = Tag.objects.create(name="mu'ālim", slug="mualim")
assert tag.name == "mu'alim"


@pytest.mark.django_db
@patch.object(ModelIndexable, "index_items")
def test_tagged_item_change(mock_indexitems, document):
tag_count = document.tags.count()
tag = Tag.objects.create(name="mu'ālim", slug="mualim")
tag2 = Tag.objects.create(name="tag2", slug="tag2")
# should reindex document with the updated set of tags on save
document.tags.add(tag)
document.tags.add(tag2)
document.save()
# should be called at least once for the document post-save & once for the tags M2M change
assert mock_indexitems.call_count >= 2
# most recent call should have the full updated set of tags
assert mock_indexitems.call_args.args[0][0].tags.count() == tag_count + 2

0 comments on commit 8fbf70e

Please sign in to comment.