Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue with outdated tag set in document index (#1331) #1332

Merged
merged 8 commits into from
Jul 21, 2023
5 changes: 4 additions & 1 deletion geniza/corpus/apps.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from django.apps import AppConfig
from django.db.models.signals import pre_save
from django.db.models.signals import m2m_changed, pre_save


class CorpusAppConfig(AppConfig):
Expand All @@ -12,4 +12,7 @@ def ready(self):
from geniza.corpus.models import TagSignalHandlers

pre_save.connect(TagSignalHandlers.unidecode_tag, sender="taggit.Tag")
m2m_changed.connect(
TagSignalHandlers.tagged_item_change, sender="taggit.TaggedItem"
)
return super().ready()
19 changes: 14 additions & 5 deletions geniza/corpus/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ class DocumentSignalHandlers:
"fragment": "fragments",
"tag": "tags",
"document type": "doctype",
"tagged item": "tagged_items",
"Related Fragment": "textblock", # textblock verbose name
"footnote": "footnotes",
"source": "footnotes__source",
Expand Down Expand Up @@ -485,6 +486,14 @@ def unidecode_tag(sender, instance, **kwargs):
"""Convert saved tags to ascii, stripping diacritics."""
instance.name = unidecode(instance.name)

@staticmethod
def tagged_item_change(sender, instance, action, **kwargs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the instance here is the document with the m2m relation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes! Added comment to clarify.

"""Ensure document (=instance) is indexed after the tags m2m relationship is saved and the
list of tags is pulled from the database, on any tag change."""
if action in ["post_add", "post_remove", "post_clear"]:
logger.debug("taggit.TaggedItem %s, reindexing related document", action)
ModelIndexable.index_items(Document.objects.filter(pk=instance.pk))


class DocumentQuerySet(MultilingualQuerySet):
def metadata_prefetch(self):
Expand Down Expand Up @@ -1262,9 +1271,6 @@ def merge_with(self, merge_docs, rationale, user=None):
metadata into this document, adds the merged documents into
list of old PGP IDs, and creates a log entry documenting
the merge, including the rationale."""
# initialize old pgpid list if previously unset
if self.old_pgpids is None:
self.old_pgpids = []

# if user is not specified, log entry will be associated with
# script and document will be flagged for review
Expand All @@ -1287,10 +1293,13 @@ def merge_with(self, merge_docs, rationale, user=None):
needs_review = [self.needs_review] if self.needs_review else []

for doc in merge_docs:
# add merge id to old pgpid list
self.old_pgpids.append(doc.id)
# add any tags from merge document tags to primary doc
self.tags.add(*doc.tags.names())
# initialize old pgpid list if previously unset
if self.old_pgpids is None:
self.old_pgpids = []
# add merge id to old pgpid list
self.old_pgpids.append(doc.id)
# add description if set and not duplicated
# for all supported languages
for lang_code in language_codes:
Expand Down
2 changes: 1 addition & 1 deletion geniza/corpus/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ def make_document(fragment):
(Information from Mediterranean Society, IV, p. 281)""",
doctype=DocumentType.objects.get_or_create(name_en="Legal")[0],
)
TextBlock.objects.create(document=doc, fragment=fragment)
doc.tags.add("bill of sale", "real estate")
TextBlock.objects.create(document=doc, fragment=fragment)
dctype = ContentType.objects.get_for_model(Document)
script_user = User.objects.get(username=settings.SCRIPT_USERNAME)
team_user = User.objects.get(username=settings.TEAM_USERNAME)
Expand Down
16 changes: 16 additions & 0 deletions geniza/corpus/tests/test_corpus_signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,19 @@ def test_unidecode_tags():
# pre_save signal should strip diacritics from tag and convert to ASCII
tag = Tag.objects.create(name="mu'ālim", slug="mualim")
assert tag.name == "mu'alim"


@pytest.mark.django_db
@patch.object(ModelIndexable, "index_items")
def test_tagged_item_change(mock_indexitems, document):
tag_count = document.tags.count()
tag = Tag.objects.create(name="mu'ālim", slug="mualim")
tag2 = Tag.objects.create(name="tag2", slug="tag2")
# should reindex document with the updated set of tags on save
document.tags.add(tag)
document.tags.add(tag2)
document.save()
# should be called at least once for the document post-save & once for the tags M2M change
assert mock_indexitems.call_count >= 2
# most recent call should have the full updated set of tags
assert mock_indexitems.call_args.args[0][0].tags.count() == tag_count + 2