-
Notifications
You must be signed in to change notification settings - Fork 69
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
workflow: added new workflow for manual maerger
Signed-off-by: Antonio Cesarano <[email protected]> * define a new workflow for the manual merge use case * add new task functions specific for such use case * integration tests
- Loading branch information
Showing
9 changed files
with
546 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# This file is part of INSPIRE. | ||
# Copyright (C) 2014-2017 CERN. | ||
# | ||
# INSPIRE is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# INSPIRE is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>. | ||
# | ||
# In applying this license, CERN does not waive the privileges and immunities | ||
# granted to it by virtue of its status as an Intergovernmental Organization | ||
# or submit itself to any jurisdiction. | ||
|
||
"""Tasks related to manual merge generic record.""" | ||
|
||
from __future__ import absolute_import, division, print_function | ||
|
||
import json | ||
|
||
from invenio_db import db | ||
|
||
from inspire_json_merger.inspire_json_merger import inspire_json_merge | ||
from inspire_dojson.utils import get_record_ref | ||
|
||
from inspirehep.modules.workflows.tasks.merging import ( | ||
insert_wf_record_source, | ||
) | ||
from inspirehep.modules.workflows.utils import with_debug_logging | ||
from inspirehep.utils.record_getter import get_db_record | ||
|
||
|
||
def _get_head_and_update(obj): | ||
head = obj.extra_data['head'] | ||
update = obj.extra_data['update'] | ||
return head, update | ||
|
||
|
||
@with_debug_logging | ||
def merge_records(obj, eng): | ||
"""Merge the records whose ids are defined in the `obj` parameter and store | ||
the merged record and relative conflicts in `obj.data` and | ||
`obj.extra_data['conflicts']`. | ||
""" | ||
head, update = _get_head_and_update(obj) | ||
|
||
merged, conflicts = inspire_json_merge( | ||
root={}, | ||
head=head, | ||
update=update, | ||
head_source=obj.extra_data['head_source'] | ||
) | ||
obj.data = merged | ||
obj.extra_data['conflicts'] = [json.loads(c.to_json()) for c in conflicts] | ||
|
||
|
||
@with_debug_logging | ||
def halt_for_approval(obj, eng): | ||
"""Stop the Workflow engine""" | ||
eng.halt( | ||
action="merge_approval", | ||
msg='Manual Merge halted for curator approval.' | ||
) | ||
|
||
|
||
@with_debug_logging | ||
def edit_metadata_and_store(obj, eng): | ||
"""Replace the `head` record with the previously merged record and updates | ||
some reference in order to delete the `update` record, linking it to the | ||
new `head`. | ||
""" | ||
|
||
head = get_db_record('lit', obj.extra_data['head_control_number']) | ||
update = get_db_record('lit', obj.extra_data['update_control_number']) | ||
|
||
head.clear() | ||
head.update(obj.data) # head's content will be replaced by merged | ||
update.merge(head) # update's uuid will point to head's uuid | ||
update.delete() # mark update record as deleted | ||
|
||
# add schema contents to refer deleted record to the merged one | ||
update['new_record'] = get_record_ref( | ||
head['control_number'], | ||
endpoint='record' | ||
) | ||
_add_deleted_records(head, update) | ||
|
||
head.commit() | ||
update.commit() | ||
db.session.commit() | ||
|
||
|
||
def _add_deleted_records(new_rec, deleted_rec): | ||
"""Mark `deleted_rec` as replaced by `new_rec` by adding its id to the | ||
deleted_record list property. | ||
""" | ||
ref = get_record_ref(deleted_rec['control_number'], 'record') | ||
new_rec.setdefault('deleted_records', []).append(ref) | ||
|
||
|
||
def save_records_as_roots(obj, eng): | ||
"""Save `head` and `update` records in the Root table in the db if they | ||
have different `sources, otherwise only `head` is saved. | ||
""" | ||
head, update = _get_head_and_update(obj) | ||
|
||
head_source = obj.extra_data['head_source'] | ||
|
||
insert_wf_record_source( | ||
json=head, | ||
source=head_source, | ||
record_uuid=obj.extra_data['head_uuid'], | ||
) | ||
|
||
update_source = obj.extra_data['update_source'] | ||
|
||
# need to save just one root per source | ||
if update_source != head_source: | ||
insert_wf_record_source( | ||
json=update, | ||
source=update_source.lower(), | ||
record_uuid=obj.extra_data['update_uuid'], | ||
) | ||
obj.save() | ||
db.session.commit() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# This file is part of INSPIRE. | ||
# Copyright (C) 2014-2017 CERN. | ||
# | ||
# INSPIRE is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# INSPIRE is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>. | ||
# | ||
# In applying this license, CERN does not waive the privileges and immunities | ||
# granted to it by virtue of its status as an Intergovernmental Organization | ||
# or submit itself to any jurisdiction. | ||
|
||
"""Workflow for manual merging generic records.""" | ||
|
||
from __future__ import absolute_import, division, print_function | ||
|
||
from invenio_workflows import start, workflow_object_class | ||
from inspire_json_merger.inspire_json_merger import ( | ||
get_head_source as merger_get_source | ||
) | ||
|
||
from inspirehep.modules.workflows.tasks.manual_merging import ( | ||
halt_for_approval, | ||
edit_metadata_and_store, merge_records, save_records_as_roots, | ||
) | ||
from inspirehep.modules.workflows.tasks.merging import ( | ||
get_head_source | ||
) | ||
from inspirehep.utils.record import get_source | ||
from inspirehep.utils.record_getter import get_db_record | ||
|
||
|
||
class ManualMerge(object): | ||
name = 'MERGE' | ||
data_type = '' | ||
|
||
workflow = ([ | ||
merge_records, | ||
halt_for_approval, | ||
# when resume here, we expect the workflow_object contains the | ||
# merged record accepted by a curator | ||
save_records_as_roots, | ||
edit_metadata_and_store, | ||
]) | ||
|
||
|
||
def start_merger( | ||
head_id, | ||
update_id, | ||
current_user_id=None, | ||
): | ||
"""Start a new ManualMerge workflow to merge two records manually. | ||
Args: | ||
head_id: the id of the first record to merge. This record is the one | ||
that will be updated with the new information. | ||
update_id: the id of the second record to merge. This record is the | ||
one that is going to be deleted and replaced by `head`. | ||
current_user_id: Id of the current user provided by the Flask app. | ||
Returns: | ||
(int): the current workflow object's id. | ||
""" | ||
data = { | ||
'pid_type': 'lit', # TODO: support | ||
'recid_head': head_id, | ||
'recid_update': update_id, | ||
} | ||
|
||
head = get_db_record('lit', head_id) | ||
update = get_db_record('lit', update_id) | ||
|
||
workflow_object = workflow_object_class.create( | ||
data=None, | ||
id_user=current_user_id, | ||
data_type='hep' | ||
) | ||
|
||
wf_id = workflow_object.id # to retrieve it later | ||
workflow_object.extra_data.update(data) | ||
|
||
# preparing identifiers in order to do less requests possible later | ||
head_source = get_head_source(head.id) or merger_get_source(head) | ||
workflow_object.extra_data['head_source'] = head_source.lower() | ||
workflow_object.extra_data['update_source'] = get_source(update).lower() | ||
|
||
workflow_object.extra_data['head_control_number'] = head_id | ||
workflow_object.extra_data['update_control_number'] = update_id | ||
|
||
workflow_object.extra_data['head_uuid'] = str(head.id) | ||
workflow_object.extra_data['update_uuid'] = str(update.id) | ||
|
||
workflow_object.extra_data['head'] = head | ||
workflow_object.extra_data['update'] = update | ||
|
||
workflow_object.save() | ||
|
||
start('manual_merge', object_id=wf_id) | ||
|
||
return wf_id |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
77 changes: 77 additions & 0 deletions
77
tests/integration/workflows/fixtures/manual_merge_record.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
{ | ||
"preprint_date": "2017-05-06", | ||
"acquisition_source": { | ||
"source": "arXiv", | ||
"datetime": "2017-05-11T08:50:25.184741", | ||
"method": "hepcrawl", | ||
"submission_number": "db9325b2362611e78bfd0242ac12000b" | ||
}, | ||
"license": [ | ||
{ | ||
"url": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/", | ||
"license": "arXiv-1.0" | ||
} | ||
], | ||
"control_number": 123456, | ||
"public_notes": [ | ||
{ | ||
"source": "arXiv", | ||
"value": "21 pages, 13 figures" | ||
} | ||
], | ||
"number_of_pages": 23, | ||
"_files": [ | ||
{ | ||
"key": "1705.02541.pdf", | ||
"size": 2806666 | ||
} | ||
], | ||
"inspire_categories": [ | ||
{ | ||
"source": "arxiv", | ||
"term": "General Physics" | ||
} | ||
], | ||
"authors": [ | ||
{ | ||
"affiliations": [], | ||
"full_name": "Assis, M." | ||
}, | ||
{ | ||
"affiliations": [], | ||
"full_name": "Jacobsen, J.L." | ||
}, | ||
{ | ||
"affiliations": [], | ||
"full_name": "Jensen, I." | ||
} | ||
], | ||
"titles": [ | ||
{ | ||
"source": "arXiv", | ||
"title": "OLD: Analyticity of the Ising susceptibility: An interpretation" | ||
} | ||
], | ||
"$schema": "http://localhost:5000/schemas/records/hep.json", | ||
"_collections": ["Literature"], | ||
"document_type": [ | ||
"thesis" | ||
], | ||
"abstracts": [ | ||
{ | ||
"source": "arXiv", | ||
"value": "Let's assume this is an old record in the system. We discuss the implications of studies of partition function zeros and equimodular curves for the analytic properties of the Ising model on a square lattice in a magnetic field. In particular we consider the dense set of singularities in the susceptibility of the Ising model at $H=0$ found by Nickel and its relation to the analyticity of the field theory computations of Fonseca and Zamolodchikov." | ||
} | ||
], | ||
"citeable": true, | ||
"arxiv_eprints": [ | ||
{ | ||
"categories": [ | ||
"math-ph", | ||
"cond-mat.stat-mech", | ||
"math.MP" | ||
], | ||
"value": "1705.02541" | ||
} | ||
] | ||
} |
Oops, something went wrong.