diff --git a/README.md b/README.md index 8d10858b..7be51e6a 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ In GeoNode 4.1 `geonode-importer` replaced the previous importer logic. - **KML** - Vector - **CSV** - Vector - **GeoTiff** - Raster +- **XML** - Update XML file for a given resource **IMPORTANT**: At the moment the importer doesn't support overwriting/skipping existing layers from the UI. Every upload will create a new dataset. Overwriting a layer (`overwrite_existing_layer`) and skipping an already existing layer (`skip_existing_layers`) is supported through the API. @@ -88,6 +89,7 @@ CELERY_TASK_QUEUES += ( Queue('importer.copy_geonode_data_table', GEONODE_EXCHANGE, routing_key='importer.copy_geonode_data_table'), Queue('importer.copy_raster_file', GEONODE_EXCHANGE, routing_key='importer.copy_raster_file'), Queue('importer.rollback', GEONODE_EXCHANGE, routing_key='importer.rollback'), + ) DATABASE_ROUTERS = ["importer.db_router.DatastoreRouter"] @@ -100,7 +102,8 @@ IMPORTER_HANDLERS = os.getenv('IMPORTER_HANDLERS', [ 'importer.handlers.shapefile.handler.ShapeFileHandler', 'importer.handlers.kml.handler.KMLFileHandler', 'importer.handlers.csv.handler.CSVFileHandler', - 'importer.handlers.geotiff.handler.GeoTiffFileHandler' + 'importer.handlers.geotiff.handler.GeoTiffFileHandler', + 'importer.handlers.xml.handler.XMLFileHandler ]) ``` diff --git a/importer/handlers/apps.py b/importer/handlers/apps.py index 45b6456c..ad819c44 100644 --- a/importer/handlers/apps.py +++ b/importer/handlers/apps.py @@ -32,6 +32,7 @@ def run_setup_hooks(*args, **kwargs): _available_settings = [ import_string(module_path)().supported_file_extension_config for module_path in settings.IMPORTER_HANDLERS + if import_string(module_path)().supported_file_extension_config ] # injecting the new config required for FE supported_type = [ diff --git a/importer/handlers/base.py b/importer/handlers/base.py index abc5e424..1cf1a4f2 100644 --- a/importer/handlers/base.py +++ b/importer/handlers/base.py @@ -58,6 +58,25 @@ def default_geometry_column_name(self): def supported_file_extension_config(self): return NotImplementedError + @property + def can_handle_xml_file(self) -> bool: + """ + True or false if the handler is able to handle XML file + By default a common workflow is always defined + To be override if some expection are needed + """ + return True + + + @property + def can_handle_sld_file(self) -> bool: + """ + True or false if the handler is able to handle SLD file + By default a common workflow is always defined + To be override if some expection are needed + """ + return True + @staticmethod def is_valid(files, user): """ diff --git a/importer/handlers/common/metadata.py b/importer/handlers/common/metadata.py new file mode 100644 index 00000000..67492a8a --- /dev/null +++ b/importer/handlers/common/metadata.py @@ -0,0 +1,51 @@ +import logging +from geonode.resource.enumerator import ExecutionRequestAction as exa +from importer.handlers.base import BaseHandler +from importer.handlers.xml.serializer import MetadataFileSerializer +from importer.utils import ImporterRequestAction as ira + +logger = logging.getLogger(__name__) + + +class MetadataFileHandler(BaseHandler): + """ + Handler to import KML files into GeoNode data db + It must provide the task_lists required to comple the upload + """ + + ACTIONS = { + exa.IMPORT.value: ( + "start_import", + "importer.import_resource" + ), + ira.ROLLBACK.value: () + } + + @staticmethod + def has_serializer(_data) -> bool: + return MetadataFileSerializer + + @property + def supported_file_extension_config(self): + return None + + @staticmethod + def extract_params_from_data(_data, action=None): + """ + Remove from the _data the params that needs to save into the executionRequest object + all the other are returned + """ + return { + "dataset_title": _data.pop("dataset_title", None), + "skip_existing_layers": _data.pop("skip_existing_layers", "False"), + "overwrite_existing_layer": _data.pop("overwrite_existing_layer", "False"), + "store_spatial_file": _data.pop("store_spatial_files", "True"), + }, _data + + @staticmethod + def perform_last_step(execution_id): + pass + + def import_resource(self, files: dict, execution_id: str, **kwargs): + pass + diff --git a/importer/handlers/gpkg/handler.py b/importer/handlers/gpkg/handler.py index 98a184e4..bd6db8ac 100644 --- a/importer/handlers/gpkg/handler.py +++ b/importer/handlers/gpkg/handler.py @@ -48,6 +48,15 @@ def supported_file_extension_config(self): "ext": ["gpkg"], } + @property + def can_handle_xml_file(self) -> bool: + """ + True or false if the handler is able to handle XML file + By default a common workflow is always defined + To be override if some expection are needed + """ + return False + @staticmethod def can_handle(_data) -> bool: """ diff --git a/importer/handlers/kml/handler.py b/importer/handlers/kml/handler.py index 8f60bacc..00941594 100644 --- a/importer/handlers/kml/handler.py +++ b/importer/handlers/kml/handler.py @@ -48,6 +48,15 @@ def supported_file_extension_config(self): "ext": ["kml", "kmz"], } + @property + def can_handle_xml_file(self) -> bool: + """ + True or false if the handler is able to handle XML file + By default a common workflow is always defined + To be override if some expection are needed + """ + return False + @staticmethod def can_handle(_data) -> bool: """ diff --git a/importer/handlers/xml/__init__.py b/importer/handlers/xml/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/importer/handlers/xml/exceptions.py b/importer/handlers/xml/exceptions.py new file mode 100644 index 00000000..c7813be6 --- /dev/null +++ b/importer/handlers/xml/exceptions.py @@ -0,0 +1,9 @@ +from rest_framework.exceptions import APIException +from rest_framework import status + + +class InvalidXmlException(APIException): + status_code = status.HTTP_400_BAD_REQUEST + default_detail = "The xml provided provided is invalid" + default_code = "invalid_xml" + category = "importer" diff --git a/importer/handlers/xml/handler.py b/importer/handlers/xml/handler.py new file mode 100644 index 00000000..213397f1 --- /dev/null +++ b/importer/handlers/xml/handler.py @@ -0,0 +1,78 @@ +import logging + +from django.shortcuts import get_object_or_404 +from geonode.layers.models import Dataset +from geonode.resource.enumerator import ExecutionRequestAction as exa +from geonode.resource.manager import resource_manager +from importer.handlers.common.metadata import MetadataFileHandler +from importer.handlers.xml.exceptions import InvalidXmlException +from importer.orchestrator import orchestrator +from owslib.etree import etree as dlxml + +logger = logging.getLogger(__name__) + + +class XMLFileHandler(MetadataFileHandler): + """ + Handler to import KML files into GeoNode data db + It must provide the task_lists required to comple the upload + """ + + @staticmethod + def can_handle(_data) -> bool: + """ + This endpoint will return True or False if with the info provided + the handler is able to handle the file or not + """ + base = _data.get("base_file") + if not base: + return False + return ( + base.endswith(".xml") + if isinstance(base, str) + else base.name.endswith(".xml") + ) + + @staticmethod + def is_valid(files, user=None): + """ + Define basic validation steps + """ + # calling base validation checks + + try: + with open(files.get("base_file")) as _xml: + dlxml.fromstring(_xml.read().encode()) + except Exception as err: + raise InvalidXmlException(f"Uploaded document is not XML or is invalid: {str(err)}") + return True + + def import_resource(self, files: dict, execution_id: str, **kwargs): + _exec = orchestrator.get_execution_object(execution_id) + # getting the dataset + alternate = _exec.input_params.get("dataset_title") + dataset = get_object_or_404(Dataset, alternate=alternate) + + # retrieving the handler used for the dataset + original_handler = orchestrator.load_handler( + dataset.resourcehandlerinfo_set\ + .first()\ + .handler_module_path + )() + + if original_handler.can_handle_xml_file: + original_handler.handle_xml_file(dataset, _exec) + else: + _path = _exec.input_params.get("files", {}).get("xml_file", _exec.input_params.get("base_file", {})) + resource_manager.update( + None, + instance=dataset, + xml_file=_path, + metadata_uploaded=True if _path else False, + vals={"dirty_state": True}, + ) + dataset.refresh_from_db() + + orchestrator.evaluate_execution_progress(execution_id, handler_module_path=str(self)) + return + diff --git a/importer/handlers/xml/serializer.py b/importer/handlers/xml/serializer.py new file mode 100644 index 00000000..c6645787 --- /dev/null +++ b/importer/handlers/xml/serializer.py @@ -0,0 +1,17 @@ +from rest_framework import serializers +from dynamic_rest.serializers import DynamicModelSerializer +from geonode.upload.models import Upload + + +class MetadataFileSerializer(DynamicModelSerializer): + class Meta: + ref_name = "MetadataFileSerializer" + model = Upload + view_name = "importer_upload" + fields = ( + "dataset_title", + "base_file" + ) + + base_file = serializers.FileField() + dataset_title = serializers.CharField(required=True) diff --git a/importer/handlers/xml/tests.py b/importer/handlers/xml/tests.py new file mode 100644 index 00000000..67c167c6 --- /dev/null +++ b/importer/handlers/xml/tests.py @@ -0,0 +1,75 @@ +from django.conf import settings +from django.contrib.auth import get_user_model +from django.test import TestCase +from geonode.base.populate_test_data import create_single_dataset +from importer import project_dir +from importer.models import ResourceHandlerInfo +from importer.orchestrator import orchestrator +from importer.handlers.xml.exceptions import InvalidXmlException +from importer.handlers.xml.handler import XMLFileHandler + + +class TestXMLFileHandler(TestCase): + databases = ("default", "datastore") + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.handler = XMLFileHandler() + cls.valid_xml = f"{settings.PROJECT_ROOT}/base/fixtures/test_xml.xml" + cls.invalid_xml = f"{project_dir}/tests/fixture/invalid.gpkg" + cls.user, _ = get_user_model().objects.get_or_create(username="admin") + cls.invalid_files = {"base_file": cls.invalid_xml, 'xml_file': cls.invalid_xml} + cls.valid_files = {"base_file": cls.valid_xml, 'xml_file': cls.valid_xml} + cls.owner = get_user_model().objects.first() + cls.layer = create_single_dataset(name="extruded_polygon", owner=cls.owner) + + def test_task_list_is_the_expected_one(self): + expected = ( + "start_import", + "importer.import_resource", + ) + self.assertEqual(len(self.handler.ACTIONS["import"]), 2) + self.assertTupleEqual(expected, self.handler.ACTIONS["import"]) + + def test_is_valid_should_raise_exception_if_the_xml_is_invalid(self): + with self.assertRaises(InvalidXmlException) as _exc: + self.handler.is_valid(files=self.invalid_files) + + self.assertIsNotNone(_exc) + self.assertTrue("Uploaded document is not XML or is invalid" in str(_exc.exception.detail)) + + def test_is_valid_should_pass_with_valid_xml(self): + self.handler.is_valid(files=self.valid_files) + + def test_can_handle_should_return_true_for_xml(self): + actual = self.handler.can_handle(self.valid_files) + self.assertTrue(actual) + + def test_can_handle_should_return_false_for_other_files(self): + actual = self.handler.can_handle({"base_file": "random.file"}) + self.assertFalse(actual) + + def test_can_successfully_import_metadata_file(self): + exec_id = orchestrator.create_execution_request( + user=get_user_model().objects.first(), + func_name="funct1", + step="step", + input_params={ + "files": self.valid_files, + "dataset_title": self.layer.alternate, + "skip_existing_layer": True, + "handler_module_path": str(self.handler), + }, + ) + ResourceHandlerInfo.objects.create( + resource=self.layer, + handler_module_path="importer.handlers.shapefile.handler.ShapeFileHandler", + ) + + self.assertEqual(self.layer.title, "extruded_polygon") + + self.handler.import_resource({}, str(exec_id)) + + self.layer.refresh_from_db() + self.assertEqual(self.layer.title, "test_dataset") \ No newline at end of file diff --git a/importer/orchestrator.py b/importer/orchestrator.py index 595b0d46..7edc6fb2 100644 --- a/importer/orchestrator.py +++ b/importer/orchestrator.py @@ -47,7 +47,7 @@ def get_handler(self, _data) -> Optional[BaseHandler]: for handler in BaseHandler.get_registry(): if handler.can_handle(_data): return handler() - logger.error("Handler not found, fallback on the legacy upload system") + logger.error("Handler not found") return None def get_serializer(self, _data) -> serializers.Serializer: diff --git a/importer/tests/unit/test_task.py b/importer/tests/unit/test_task.py index 20a1966a..897d6d94 100644 --- a/importer/tests/unit/test_task.py +++ b/importer/tests/unit/test_task.py @@ -1,4 +1,8 @@ import os +import os +import shutil + +from django.conf import settings from django.contrib.auth import get_user_model from django.test.utils import override_settings from unittest.mock import patch @@ -14,7 +18,7 @@ import_resource, orchestrator, publish_resource, - rollback, + rollback ) from geonode.resource.models import ExecutionRequest from geonode.layers.models import Dataset @@ -23,6 +27,7 @@ from geonode.base.populate_test_data import create_single_dataset from dynamic_models.models import ModelSchema, FieldSchema from dynamic_models.exceptions import DynamicModelError, InvalidFieldNameError +from importer.models import ResourceHandlerInfo from importer.tests.utils import ( ImporterBaseTestSupport, @@ -466,6 +471,39 @@ def test_rollback_works_as_expected_raster( if exec_id: ExecutionRequest.objects.filter(exec_id=str(exec_id)).delete() + @override_settings(MEDIA_ROOT="/tmp/") + def test_import_metadata_should_work_as_expected(self): + handler = "importer.handlers.xml.handler.XMLFileHandler" + # lets copy the file to the temporary folder + # later will be removed + valid_xml = f"{settings.PROJECT_ROOT}/base/fixtures/test_xml.xml" + shutil.copy(valid_xml, '/tmp') + + user, _ = get_user_model().objects.get_or_create(username="admin") + valid_files = {"base_file": valid_xml, 'xml_file': valid_xml} + + layer = create_single_dataset("test_dataset_importer") + exec_id = orchestrator.create_execution_request( + user=get_user_model().objects.first(), + func_name="funct1", + step="step", + input_params={ + "files": valid_files, + "dataset_title": layer.alternate, + "skip_existing_layer": True, + "handler_module_path": str(handler), + }, + ) + ResourceHandlerInfo.objects.create( + resource=layer, + handler_module_path="importer.handlers.shapefile.handler.ShapeFileHandler", + ) + + import_resource(str(exec_id), handler, "import") + + layer.refresh_from_db() + self.assertEqual(layer.title, "test_dataset") + class TestDynamicModelSchema(TransactionImporterBaseTestSupport): databases = ("default", "datastore")