Skip to content

Commit

Permalink
workflow: added new workflow for manual maerger
Browse files Browse the repository at this point in the history
Signed-off-by: Antonio Cesarano <[email protected]>

* define a new workflow for the manual merge use case
* add new task functions specific for such use case
* integration tests
  • Loading branch information
ammirate committed Oct 27, 2017
1 parent bff3e1e commit 0857635
Show file tree
Hide file tree
Showing 9 changed files with 546 additions and 1 deletion.
16 changes: 15 additions & 1 deletion inspirehep/modules/workflows/actions/merge_approval.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

from __future__ import absolute_import, division, print_function

from inspirehep.modules.workflows.workflows.manual_merge import ManualMerge


class MergeApproval(object):
"""Class representing the merge action."""
Expand All @@ -32,4 +34,16 @@ class MergeApproval(object):
@staticmethod
def resolve(obj, *args, **kwargs):
"""Resolve the action taken in the approval action."""
pass

obj.extra_data["approved"] = True
obj.extra_data["auto-approved"] = False
obj.remove_action()
obj.save()

delayed = True
if obj.workflow.name == 'manual_merge':
# the manual merge wf should be sync
delayed = False

obj.continue_workflow(delayed=delayed)
return True
133 changes: 133 additions & 0 deletions inspirehep/modules/workflows/tasks/manual_merging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# -*- coding: utf-8 -*-
#
# This file is part of INSPIRE.
# Copyright (C) 2014-2017 CERN.
#
# INSPIRE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# INSPIRE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.

"""Tasks related to manual merge generic record."""

from __future__ import absolute_import, division, print_function

import json

from invenio_db import db

from inspire_json_merger.inspire_json_merger import inspire_json_merge
from inspire_dojson.utils import get_record_ref

from inspirehep.modules.workflows.tasks.merging import (
insert_wf_record_source,
)
from inspirehep.modules.workflows.utils import with_debug_logging
from inspirehep.utils.record_getter import get_db_record


def _get_head_and_update(obj):
head = obj.extra_data['head']
update = obj.extra_data['update']
return head, update


@with_debug_logging
def merge_records(obj, eng):
"""Merge the records whose ids are defined in the `obj` parameter and store
the merged record and relative conflicts in `obj.data` and
`obj.extra_data['conflicts']`.
"""
head, update = _get_head_and_update(obj)

merged, conflicts = inspire_json_merge(
root={},
head=head,
update=update,
head_source=obj.extra_data['head_source']
)
obj.data = merged
obj.extra_data['conflicts'] = [json.loads(c.to_json()) for c in conflicts]


@with_debug_logging
def halt_for_approval(obj, eng):
"""Stop the Workflow engine"""
eng.halt(
action="merge_approval",
msg='Manual Merge halted for curator approval.'
)


@with_debug_logging
def edit_metadata_and_store(obj, eng):
"""Replace the `head` record with the previously merged record and updates
some reference in order to delete the `update` record, linking it to the
new `head`.
"""

head = get_db_record('lit', obj.extra_data['head_control_number'])
update = get_db_record('lit', obj.extra_data['update_control_number'])

head.clear()
head.update(obj.data) # head's content will be replaced by merged
update.merge(head) # update's uuid will point to head's uuid
update.delete() # mark update record as deleted

# add schema contents to refer deleted record to the merged one
update['new_record'] = get_record_ref(
head['control_number'],
endpoint='record'
)
_add_deleted_records(head, update)

head.commit()
update.commit()
db.session.commit()


def _add_deleted_records(new_rec, deleted_rec):
"""Mark `deleted_rec` as replaced by `new_rec` by adding its id to the
deleted_record list property.
"""
ref = get_record_ref(deleted_rec['control_number'], 'record')
new_rec.setdefault('deleted_records', []).append(ref)


def save_records_as_roots(obj, eng):
"""Save `head` and `update` records in the Root table in the db if they
have different `sources, otherwise only `head` is saved.
"""
head, update = _get_head_and_update(obj)

head_source = obj.extra_data['head_source']

insert_wf_record_source(
json=head,
source=head_source,
record_uuid=obj.extra_data['head_uuid'],
)

update_source = obj.extra_data['update_source']

# need to save just one root per source
if update_source != head_source:
insert_wf_record_source(
json=update,
source=update_source.lower(),
record_uuid=obj.extra_data['update_uuid'],
)
obj.save()
db.session.commit()
1 change: 1 addition & 0 deletions inspirehep/modules/workflows/workflows/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@

from .article import Article # noqa: F401
from .author import Author # noqa: F401
from .manual_merge import ManualMerge # noqa: F401
110 changes: 110 additions & 0 deletions inspirehep/modules/workflows/workflows/manual_merge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# -*- coding: utf-8 -*-
#
# This file is part of INSPIRE.
# Copyright (C) 2014-2017 CERN.
#
# INSPIRE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# INSPIRE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.

"""Workflow for manual merging generic records."""

from __future__ import absolute_import, division, print_function

from invenio_workflows import start, workflow_object_class
from inspire_json_merger.inspire_json_merger import (
get_head_source as merger_get_source
)

from inspirehep.modules.workflows.tasks.manual_merging import (
halt_for_approval,
edit_metadata_and_store, merge_records, save_records_as_roots,
)
from inspirehep.modules.workflows.tasks.merging import (
get_head_source
)
from inspirehep.utils.record import get_source
from inspirehep.utils.record_getter import get_db_record


class ManualMerge(object):
name = 'MERGE'
data_type = ''

workflow = ([
merge_records,
halt_for_approval,
# when resume here, we expect the workflow_object contains the
# merged record accepted by a curator
save_records_as_roots,
edit_metadata_and_store,
])


def start_merger(
head_id,
update_id,
current_user_id=None,
):
"""Start a new ManualMerge workflow to merge two records manually.
Args:
head_id: the id of the first record to merge. This record is the one
that will be updated with the new information.
update_id: the id of the second record to merge. This record is the
one that is going to be deleted and replaced by `head`.
current_user_id: Id of the current user provided by the Flask app.
Returns:
(int): the current workflow object's id.
"""
data = {
'pid_type': 'lit', # TODO: support
'recid_head': head_id,
'recid_update': update_id,
}

head = get_db_record('lit', head_id)
update = get_db_record('lit', update_id)

workflow_object = workflow_object_class.create(
data=None,
id_user=current_user_id,
data_type='hep'
)

wf_id = workflow_object.id # to retrieve it later
workflow_object.extra_data.update(data)

# preparing identifiers in order to do less requests possible later
head_source = get_head_source(head.id) or merger_get_source(head)
workflow_object.extra_data['head_source'] = head_source.lower()
workflow_object.extra_data['update_source'] = get_source(update).lower()

workflow_object.extra_data['head_control_number'] = head_id
workflow_object.extra_data['update_control_number'] = update_id

workflow_object.extra_data['head_uuid'] = str(head.id)
workflow_object.extra_data['update_uuid'] = str(update.id)

workflow_object.extra_data['head'] = head
workflow_object.extra_data['update'] = update

workflow_object.save()

start('manual_merge', object_id=wf_id)

return wf_id
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@
'invenio_workflows.workflows': [
'article = inspirehep.modules.workflows.workflows:Article',
'author = inspirehep.modules.workflows.workflows:Author',
'manual_merge = inspirehep.modules.workflows.workflows:ManualMerge',
],
'invenio_workflows_ui.actions': [
'author_approval = inspirehep.modules.workflows.actions.author_approval:AuthorApproval',
Expand Down
77 changes: 77 additions & 0 deletions tests/integration/workflows/fixtures/manual_merge_record.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
{
"preprint_date": "2017-05-06",
"acquisition_source": {
"source": "arXiv",
"datetime": "2017-05-11T08:50:25.184741",
"method": "hepcrawl",
"submission_number": "db9325b2362611e78bfd0242ac12000b"
},
"license": [
{
"url": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/",
"license": "arXiv-1.0"
}
],
"control_number": 123456,
"public_notes": [
{
"source": "arXiv",
"value": "21 pages, 13 figures"
}
],
"number_of_pages": 23,
"_files": [
{
"key": "1705.02541.pdf",
"size": 2806666
}
],
"inspire_categories": [
{
"source": "arxiv",
"term": "General Physics"
}
],
"authors": [
{
"affiliations": [],
"full_name": "Assis, M."
},
{
"affiliations": [],
"full_name": "Jacobsen, J.L."
},
{
"affiliations": [],
"full_name": "Jensen, I."
}
],
"titles": [
{
"source": "arXiv",
"title": "OLD: Analyticity of the Ising susceptibility: An interpretation"
}
],
"$schema": "http://localhost:5000/schemas/records/hep.json",
"_collections": ["Literature"],
"document_type": [
"thesis"
],
"abstracts": [
{
"source": "arXiv",
"value": "Let's assume this is an old record in the system. We discuss the implications of studies of partition function zeros and equimodular curves for the analytic properties of the Ising model on a square lattice in a magnetic field. In particular we consider the dense set of singularities in the susceptibility of the Ising model at $H=0$ found by Nickel and its relation to the analyticity of the field theory computations of Fonseca and Zamolodchikov."
}
],
"citeable": true,
"arxiv_eprints": [
{
"categories": [
"math-ph",
"cond-mat.stat-mech",
"math.MP"
],
"value": "1705.02541"
}
]
}
Loading

0 comments on commit 0857635

Please sign in to comment.