Princeton-CDH · blms · Jul 21, 2023 · Feb 27, 2023 · Jun 27, 2023 · Jun 28, 2023
diff --git a/geniza/corpus/apps.py b/geniza/corpus/apps.py
@@ -1,5 +1,5 @@
 from django.apps import AppConfig
-from django.db.models.signals import pre_save
+from django.db.models.signals import m2m_changed, pre_save
 
 
 class CorpusAppConfig(AppConfig):
@@ -12,4 +12,7 @@ def ready(self):
         from geniza.corpus.models import TagSignalHandlers
 
         pre_save.connect(TagSignalHandlers.unidecode_tag, sender="taggit.Tag")
+        m2m_changed.connect(
+            TagSignalHandlers.tagged_item_change, sender="taggit.TaggedItem"
+        )
         return super().ready()
diff --git a/geniza/corpus/models.py b/geniza/corpus/models.py
@@ -428,6 +428,7 @@ class DocumentSignalHandlers:
         "fragment": "fragments",
         "tag": "tags",
         "document type": "doctype",
+        "tagged item": "tagged_items",
         "Related Fragment": "textblock",  # textblock verbose name
         "footnote": "footnotes",
         "source": "footnotes__source",
@@ -485,6 +486,15 @@ def unidecode_tag(sender, instance, **kwargs):
         """Convert saved tags to ascii, stripping diacritics."""
         instance.name = unidecode(instance.name)
 
+    @staticmethod
+    def tagged_item_change(sender, instance, action, **kwargs):
+        """Ensure document is indexed after the tags m2m relationship is saved and the list
+        of tags is refreshed from the database, on any tag change."""
+        if action in ["post_add", "post_remove", "post_clear"]:
+            logger.debug("taggit.TaggedItem %s, reindexing related document", action)
+            instance.refresh_from_db()
+            ModelIndexable.index_items([instance])
+
 
 class DocumentQuerySet(MultilingualQuerySet):
     def metadata_prefetch(self):
@@ -1219,10 +1229,6 @@ def index_data(self):
             "post_save": DocumentSignalHandlers.related_save,
             "pre_delete": DocumentSignalHandlers.related_delete,
         },
-        "tags": {
-            "post_save": DocumentSignalHandlers.related_save,
-            "pre_delete": DocumentSignalHandlers.related_delete,
-        },
         "doctype": {
             "post_save": DocumentSignalHandlers.related_save,
             "pre_delete": DocumentSignalHandlers.related_delete,
@@ -1262,9 +1268,6 @@ def merge_with(self, merge_docs, rationale, user=None):
         metadata into this document, adds the merged documents into
         list of old PGP IDs, and creates a log entry documenting
         the merge, including the rationale."""
-        # initialize old pgpid list if previously unset
-        if self.old_pgpids is None:
-            self.old_pgpids = []
 
         # if user is not specified, log entry will be associated with
         # script and document will be flagged for review
@@ -1287,10 +1290,15 @@ def merge_with(self, merge_docs, rationale, user=None):
         needs_review = [self.needs_review] if self.needs_review else []
 
         for doc in merge_docs:
-            # add merge id to old pgpid list
-            self.old_pgpids.append(doc.id)
             # add any tags from merge document tags to primary doc
+            # NOTE: This must be done before any other changes to self because it will fire
+            # m2m_changed signal which calls self.refresh_from_db()
 doc.tags.add("bill of sale", "real estate") 
 TextBlock.objects.create(document=doc, fragment=fragment) 
 doc.tags.add("bill of sale", "real estate") 
 TextBlock.objects.create(document=doc, fragment=fragment) 
             self.tags.add(*doc.tags.names())
+            # initialize old pgpid list if previously unset
+            if self.old_pgpids is None:
+                self.old_pgpids = []
+            # add merge id to old pgpid list
+            self.old_pgpids.append(doc.id)
             # add description if set and not duplicated
             # for all supported languages
             for lang_code in language_codes:

diff --git a/geniza/corpus/tests/test_corpus_signals.py b/geniza/corpus/tests/test_corpus_signals.py
@@ -103,3 +103,19 @@ def test_unidecode_tags():
     # pre_save signal should strip diacritics from tag and convert to ASCII
     tag = Tag.objects.create(name="mu'ālim", slug="mualim")
     assert tag.name == "mu'alim"
+
+
+@pytest.mark.django_db
+@patch.object(ModelIndexable, "index_items")
+def test_tagged_item_change(mock_indexitems, document):
+    tag_count = document.tags.count()
+    tag = Tag.objects.create(name="mu'ālim", slug="mualim")
+    tag2 = Tag.objects.create(name="tag2", slug="tag2")
+    # should reindex document with the updated set of tags on save
+    document.tags.add(tag)
+    document.tags.add(tag2)
+    document.save()
+    # should be called at least once for the document post-save & once for the tags M2M change
+    assert mock_indexitems.call_count >= 2
+    # most recent call should have the full updated set of tags
+    assert mock_indexitems.call_args.args[0][0].tags.count() == tag_count + 2