Skip to content

Commit

Permalink
Fixes #200: Add XML file handler (#202)
Browse files Browse the repository at this point in the history
* Fixes #200: Add XML file handler
* Fixes #200: Remove import_metadata, collapse it to import_resource
* Fixes #200: Remove unused import
---------
Co-authored-by: Giovanni Allegri <[email protected]>
  • Loading branch information
mattiagiupponi authored Mar 7, 2024
1 parent 085cbcc commit 02bda68
Show file tree
Hide file tree
Showing 13 changed files with 312 additions and 3 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ In GeoNode 4.1 `geonode-importer` replaced the previous importer logic.
- **KML** - Vector
- **CSV** - Vector
- **GeoTiff** - Raster
- **XML** - Update XML file for a given resource

**IMPORTANT**: At the moment the importer doesn't support overwriting/skipping existing layers from the UI. Every upload will create a new dataset.
Overwriting a layer (`overwrite_existing_layer`) and skipping an already existing layer (`skip_existing_layers`) is supported through the API.
Expand Down Expand Up @@ -88,6 +89,7 @@ CELERY_TASK_QUEUES += (
Queue('importer.copy_geonode_data_table', GEONODE_EXCHANGE, routing_key='importer.copy_geonode_data_table'),
Queue('importer.copy_raster_file', GEONODE_EXCHANGE, routing_key='importer.copy_raster_file'),
Queue('importer.rollback', GEONODE_EXCHANGE, routing_key='importer.rollback'),

)

DATABASE_ROUTERS = ["importer.db_router.DatastoreRouter"]
Expand All @@ -100,7 +102,8 @@ IMPORTER_HANDLERS = os.getenv('IMPORTER_HANDLERS', [
'importer.handlers.shapefile.handler.ShapeFileHandler',
'importer.handlers.kml.handler.KMLFileHandler',
'importer.handlers.csv.handler.CSVFileHandler',
'importer.handlers.geotiff.handler.GeoTiffFileHandler'
'importer.handlers.geotiff.handler.GeoTiffFileHandler',
'importer.handlers.xml.handler.XMLFileHandler
])

```
Expand Down
1 change: 1 addition & 0 deletions importer/handlers/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def run_setup_hooks(*args, **kwargs):
_available_settings = [
import_string(module_path)().supported_file_extension_config
for module_path in settings.IMPORTER_HANDLERS
if import_string(module_path)().supported_file_extension_config
]
# injecting the new config required for FE
supported_type = [
Expand Down
19 changes: 19 additions & 0 deletions importer/handlers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,25 @@ def default_geometry_column_name(self):
def supported_file_extension_config(self):
return NotImplementedError

@property
def can_handle_xml_file(self) -> bool:
"""
True or false if the handler is able to handle XML file
By default a common workflow is always defined
To be override if some expection are needed
"""
return True


@property
def can_handle_sld_file(self) -> bool:
"""
True or false if the handler is able to handle SLD file
By default a common workflow is always defined
To be override if some expection are needed
"""
return True

@staticmethod
def is_valid(files, user):
"""
Expand Down
51 changes: 51 additions & 0 deletions importer/handlers/common/metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import logging
from geonode.resource.enumerator import ExecutionRequestAction as exa
from importer.handlers.base import BaseHandler
from importer.handlers.xml.serializer import MetadataFileSerializer
from importer.utils import ImporterRequestAction as ira

logger = logging.getLogger(__name__)


class MetadataFileHandler(BaseHandler):
"""
Handler to import KML files into GeoNode data db
It must provide the task_lists required to comple the upload
"""

ACTIONS = {
exa.IMPORT.value: (
"start_import",
"importer.import_resource"
),
ira.ROLLBACK.value: ()
}

@staticmethod
def has_serializer(_data) -> bool:
return MetadataFileSerializer

@property
def supported_file_extension_config(self):
return None

@staticmethod
def extract_params_from_data(_data, action=None):
"""
Remove from the _data the params that needs to save into the executionRequest object
all the other are returned
"""
return {
"dataset_title": _data.pop("dataset_title", None),
"skip_existing_layers": _data.pop("skip_existing_layers", "False"),
"overwrite_existing_layer": _data.pop("overwrite_existing_layer", "False"),
"store_spatial_file": _data.pop("store_spatial_files", "True"),
}, _data

@staticmethod
def perform_last_step(execution_id):
pass

def import_resource(self, files: dict, execution_id: str, **kwargs):
pass

9 changes: 9 additions & 0 deletions importer/handlers/gpkg/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ def supported_file_extension_config(self):
"ext": ["gpkg"],
}

@property
def can_handle_xml_file(self) -> bool:
"""
True or false if the handler is able to handle XML file
By default a common workflow is always defined
To be override if some expection are needed
"""
return False

@staticmethod
def can_handle(_data) -> bool:
"""
Expand Down
9 changes: 9 additions & 0 deletions importer/handlers/kml/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ def supported_file_extension_config(self):
"ext": ["kml", "kmz"],
}

@property
def can_handle_xml_file(self) -> bool:
"""
True or false if the handler is able to handle XML file
By default a common workflow is always defined
To be override if some expection are needed
"""
return False

@staticmethod
def can_handle(_data) -> bool:
"""
Expand Down
Empty file.
9 changes: 9 additions & 0 deletions importer/handlers/xml/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from rest_framework.exceptions import APIException
from rest_framework import status


class InvalidXmlException(APIException):
status_code = status.HTTP_400_BAD_REQUEST
default_detail = "The xml provided provided is invalid"
default_code = "invalid_xml"
category = "importer"
78 changes: 78 additions & 0 deletions importer/handlers/xml/handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import logging

from django.shortcuts import get_object_or_404
from geonode.layers.models import Dataset
from geonode.resource.enumerator import ExecutionRequestAction as exa
from geonode.resource.manager import resource_manager
from importer.handlers.common.metadata import MetadataFileHandler
from importer.handlers.xml.exceptions import InvalidXmlException
from importer.orchestrator import orchestrator
from owslib.etree import etree as dlxml

logger = logging.getLogger(__name__)


class XMLFileHandler(MetadataFileHandler):
"""
Handler to import KML files into GeoNode data db
It must provide the task_lists required to comple the upload
"""

@staticmethod
def can_handle(_data) -> bool:
"""
This endpoint will return True or False if with the info provided
the handler is able to handle the file or not
"""
base = _data.get("base_file")
if not base:
return False
return (
base.endswith(".xml")
if isinstance(base, str)
else base.name.endswith(".xml")
)

@staticmethod
def is_valid(files, user=None):
"""
Define basic validation steps
"""
# calling base validation checks

try:
with open(files.get("base_file")) as _xml:
dlxml.fromstring(_xml.read().encode())
except Exception as err:
raise InvalidXmlException(f"Uploaded document is not XML or is invalid: {str(err)}")
return True

def import_resource(self, files: dict, execution_id: str, **kwargs):
_exec = orchestrator.get_execution_object(execution_id)
# getting the dataset
alternate = _exec.input_params.get("dataset_title")
dataset = get_object_or_404(Dataset, alternate=alternate)

# retrieving the handler used for the dataset
original_handler = orchestrator.load_handler(
dataset.resourcehandlerinfo_set\
.first()\
.handler_module_path
)()

if original_handler.can_handle_xml_file:
original_handler.handle_xml_file(dataset, _exec)
else:
_path = _exec.input_params.get("files", {}).get("xml_file", _exec.input_params.get("base_file", {}))
resource_manager.update(
None,
instance=dataset,
xml_file=_path,
metadata_uploaded=True if _path else False,
vals={"dirty_state": True},
)
dataset.refresh_from_db()

orchestrator.evaluate_execution_progress(execution_id, handler_module_path=str(self))
return

17 changes: 17 additions & 0 deletions importer/handlers/xml/serializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from rest_framework import serializers
from dynamic_rest.serializers import DynamicModelSerializer
from geonode.upload.models import Upload


class MetadataFileSerializer(DynamicModelSerializer):
class Meta:
ref_name = "MetadataFileSerializer"
model = Upload
view_name = "importer_upload"
fields = (
"dataset_title",
"base_file"
)

base_file = serializers.FileField()
dataset_title = serializers.CharField(required=True)
75 changes: 75 additions & 0 deletions importer/handlers/xml/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from django.conf import settings
from django.contrib.auth import get_user_model
from django.test import TestCase
from geonode.base.populate_test_data import create_single_dataset
from importer import project_dir
from importer.models import ResourceHandlerInfo
from importer.orchestrator import orchestrator
from importer.handlers.xml.exceptions import InvalidXmlException
from importer.handlers.xml.handler import XMLFileHandler


class TestXMLFileHandler(TestCase):
databases = ("default", "datastore")

@classmethod
def setUpClass(cls):
super().setUpClass()
cls.handler = XMLFileHandler()
cls.valid_xml = f"{settings.PROJECT_ROOT}/base/fixtures/test_xml.xml"
cls.invalid_xml = f"{project_dir}/tests/fixture/invalid.gpkg"
cls.user, _ = get_user_model().objects.get_or_create(username="admin")
cls.invalid_files = {"base_file": cls.invalid_xml, 'xml_file': cls.invalid_xml}
cls.valid_files = {"base_file": cls.valid_xml, 'xml_file': cls.valid_xml}
cls.owner = get_user_model().objects.first()
cls.layer = create_single_dataset(name="extruded_polygon", owner=cls.owner)

def test_task_list_is_the_expected_one(self):
expected = (
"start_import",
"importer.import_resource",
)
self.assertEqual(len(self.handler.ACTIONS["import"]), 2)
self.assertTupleEqual(expected, self.handler.ACTIONS["import"])

def test_is_valid_should_raise_exception_if_the_xml_is_invalid(self):
with self.assertRaises(InvalidXmlException) as _exc:
self.handler.is_valid(files=self.invalid_files)

self.assertIsNotNone(_exc)
self.assertTrue("Uploaded document is not XML or is invalid" in str(_exc.exception.detail))

def test_is_valid_should_pass_with_valid_xml(self):
self.handler.is_valid(files=self.valid_files)

def test_can_handle_should_return_true_for_xml(self):
actual = self.handler.can_handle(self.valid_files)
self.assertTrue(actual)

def test_can_handle_should_return_false_for_other_files(self):
actual = self.handler.can_handle({"base_file": "random.file"})
self.assertFalse(actual)

def test_can_successfully_import_metadata_file(self):
exec_id = orchestrator.create_execution_request(
user=get_user_model().objects.first(),
func_name="funct1",
step="step",
input_params={
"files": self.valid_files,
"dataset_title": self.layer.alternate,
"skip_existing_layer": True,
"handler_module_path": str(self.handler),
},
)
ResourceHandlerInfo.objects.create(
resource=self.layer,
handler_module_path="importer.handlers.shapefile.handler.ShapeFileHandler",
)

self.assertEqual(self.layer.title, "extruded_polygon")

self.handler.import_resource({}, str(exec_id))

self.layer.refresh_from_db()
self.assertEqual(self.layer.title, "test_dataset")
2 changes: 1 addition & 1 deletion importer/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def get_handler(self, _data) -> Optional[BaseHandler]:
for handler in BaseHandler.get_registry():
if handler.can_handle(_data):
return handler()
logger.error("Handler not found, fallback on the legacy upload system")
logger.error("Handler not found")
return None

def get_serializer(self, _data) -> serializers.Serializer:
Expand Down
Loading

0 comments on commit 02bda68

Please sign in to comment.