Skip to content

Commit

Permalink
[Fixes #201] Implement SldFileHandler (#203)
Browse files Browse the repository at this point in the history
* [Fixes #201] Refactor metadata common and add SLD handler

* Fix SLD file handler and black formatting
  • Loading branch information
mattiagiupponi authored Mar 13, 2024
1 parent e29c70e commit b2b038c
Show file tree
Hide file tree
Showing 27 changed files with 243 additions and 91 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ In GeoNode 4.1 `geonode-importer` replaced the previous importer logic.
- **CSV** - Vector
- **GeoTiff** - Raster
- **XML** - Update XML file for a given resource
- **SLD** - Update SLD file for a given resource

**IMPORTANT**: At the moment the importer doesn't support overwriting/skipping existing layers from the UI. Every upload will create a new dataset.
Overwriting a layer (`overwrite_existing_layer`) and skipping an already existing layer (`skip_existing_layers`) is supported through the API.
Expand Down Expand Up @@ -107,7 +108,8 @@ IMPORTER_HANDLERS = os.getenv('IMPORTER_HANDLERS', [
'importer.handlers.kml.handler.KMLFileHandler',
'importer.handlers.csv.handler.CSVFileHandler',
'importer.handlers.geotiff.handler.GeoTiffFileHandler',
'importer.handlers.xml.handler.XMLFileHandler
'importer.handlers.xml.handler.XMLFileHandler',
'importer.handlers.sld.handler.SLDFileHandler'
])

```
Expand Down
2 changes: 1 addition & 1 deletion importer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

project_dir = os.path.dirname(os.path.abspath(__file__))

VERSION = (1, 0, 7)
VERSION = (1, 0, 8)
__version__ = ".".join([str(i) for i in VERSION])
__author__ = "geosolutions-it"
__email__ = "[email protected]"
Expand Down
1 change: 0 additions & 1 deletion importer/handlers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def can_handle_xml_file(self) -> bool:
"""
return True


@property
def can_handle_sld_file(self) -> bool:
"""
Expand Down
46 changes: 37 additions & 9 deletions importer/handlers/common/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,36 @@
from importer.handlers.base import BaseHandler
from importer.handlers.xml.serializer import MetadataFileSerializer
from importer.utils import ImporterRequestAction as ira
from importer.orchestrator import orchestrator
from django.shortcuts import get_object_or_404
from geonode.layers.models import Dataset

logger = logging.getLogger(__name__)


class MetadataFileHandler(BaseHandler):
"""
Handler to import KML files into GeoNode data db
Handler to import metadata files into GeoNode data db
It must provide the task_lists required to comple the upload
"""

ACTIONS = {
exa.IMPORT.value: (
"start_import",
"importer.import_resource"
),
ira.ROLLBACK.value: ()
exa.IMPORT.value: ("start_import", "importer.import_resource"),
ira.ROLLBACK.value: (),
}

@staticmethod
def has_serializer(_data) -> bool:
return MetadataFileSerializer
def has_serializer(data) -> bool:
_base = data.get("base_file")
if not _base:
return False
if (
_base.endswith("xml") or _base.endswith("sld")
if isinstance(_base, str)
else _base.name.endswith("xml") or _base.name.endswith("sld")
):
return MetadataFileSerializer
return False

@property
def supported_file_extension_config(self):
Expand All @@ -47,5 +56,24 @@ def perform_last_step(execution_id):
pass

def import_resource(self, files: dict, execution_id: str, **kwargs):
pass
_exec = orchestrator.get_execution_object(execution_id)
# getting the dataset
alternate = _exec.input_params.get("dataset_title")
dataset = get_object_or_404(Dataset, alternate=alternate)

# retrieving the handler used for the dataset
original_handler = orchestrator.load_handler(
dataset.resourcehandlerinfo_set.first().handler_module_path
)()

self.handle_metadata_resource(_exec, dataset, original_handler)

dataset.refresh_from_db()

orchestrator.evaluate_execution_progress(
execution_id, handler_module_path=str(self)
)
return dataset

def handle_metadata_resource(self, _exec, dataset, original_handler):
raise NotImplementedError
7 changes: 3 additions & 4 deletions importer/handlers/common/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from osgeo import gdal
from importer.celery_app import importer_app
from geonode.storage.manager import storage_manager
from geonode.geoserver.helpers import get_store

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -250,7 +249,7 @@ def identify_authority(self, layer):
raise Exception(
"CRS authority code not found, fallback to default behaviour"
)
except:
except Exception:
spatial_ref = layer.GetSpatialRef()
spatial_ref.AutoIdentifyEPSG()
_name = spatial_ref.GetAuthorityName(None) or spatial_ref.GetAttrValue(
Expand Down Expand Up @@ -526,7 +525,7 @@ def rollback(
step_index = steps.index(rollback_from_step)
# the start_import, start_copy etc.. dont do anything as step, is just the start
# so there is nothing to rollback
steps_to_rollback = steps[1 : step_index + 1]
steps_to_rollback = steps[1 : step_index + 1] # noqa
if not steps_to_rollback:
return
# reversing the tuple to going backwards with the rollback
Expand All @@ -536,7 +535,7 @@ def rollback(
istance_name = (
find_key_recursively(kwargs, "new_dataset_alternate") or args[3]
)
except:
except Exception:
pass

logger.warning(
Expand Down
4 changes: 2 additions & 2 deletions importer/handlers/common/tests_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def test_import_with_ogr2ogr_without_errors_should_call_the_right_command(

_open.assert_called_once()
_open.assert_called_with(
f"/usr/bin/ogr2ogr --config PG_USE_COPY YES -f PostgreSQL PG:\" dbname='test_geonode_data' host="
"/usr/bin/ogr2ogr --config PG_USE_COPY YES -f PostgreSQL PG:\" dbname='test_geonode_data' host="
+ os.getenv("DATABASE_HOST", "localhost")
+ " port=5432 user='geonode_data' password='geonode_data' \" \""
+ self.valid_files.get("base_file")
Expand Down Expand Up @@ -256,7 +256,7 @@ def test_import_with_ogr2ogr_with_errors_should_raise_exception(self, _open):

_open.assert_called_once()
_open.assert_called_with(
f"/usr/bin/ogr2ogr --config PG_USE_COPY YES -f PostgreSQL PG:\" dbname='test_geonode_data' host="
"/usr/bin/ogr2ogr --config PG_USE_COPY YES -f PostgreSQL PG:\" dbname='test_geonode_data' host="
+ os.getenv("DATABASE_HOST", "localhost")
+ " port=5432 user='geonode_data' password='geonode_data' \" \""
+ self.valid_files.get("base_file")
Expand Down
10 changes: 5 additions & 5 deletions importer/handlers/common/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def identify_authority(self, layer):
raise Exception(
"CRS authority code not found, fallback to default behaviour"
)
except:
except Exception:
spatial_ref = layer.GetSpatialRef()
spatial_ref.AutoIdentifyEPSG()
_name = spatial_ref.GetAuthorityName(None) or spatial_ref.GetAttrValue(
Expand Down Expand Up @@ -375,7 +375,7 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str:
)

# prepare the async chord workflow with the on_success and on_fail methods
workflow = chord(group_to_call)(
workflow = chord(group_to_call)( # noqa
import_next_step.s(
execution_id,
str(self), # passing the handler module path
Expand Down Expand Up @@ -530,7 +530,7 @@ def create_dynamic_model_fields(
# ones we have the schema, here we create a list of chunked value
# so the async task will handle max of 30 field per task
list_chunked = [
layer_schema[i : i + 30] for i in range(0, len(layer_schema), 30)
layer_schema[i : i + 30] for i in range(0, len(layer_schema), 30) # noqa
]

# definition of the celery group needed to run the async workflow.
Expand Down Expand Up @@ -777,7 +777,7 @@ def rollback(
step_index = steps.index(rollback_from_step)
# the start_import, start_copy etc.. dont do anything as step, is just the start
# so there is nothing to rollback
steps_to_rollback = steps[1 : step_index + 1]
steps_to_rollback = steps[1 : step_index + 1] # noqa
if not steps_to_rollback:
return
# reversing the tuple to going backwards with the rollback
Expand All @@ -787,7 +787,7 @@ def rollback(
instance_name = (
find_key_recursively(kwargs, "new_dataset_alternate") or args[3]
)
except:
except Exception:
pass

logger.warning(
Expand Down
2 changes: 1 addition & 1 deletion importer/handlers/csv/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def create_dynamic_model_fields(
# ones we have the schema, here we create a list of chunked value
# so the async task will handle max of 30 field per task
list_chunked = [
layer_schema[i : i + 30] for i in range(0, len(layer_schema), 30)
layer_schema[i : i + 30] for i in range(0, len(layer_schema), 30) # noqa
]

# definition of the celery group needed to run the async workflow.
Expand Down
4 changes: 2 additions & 2 deletions importer/handlers/csv/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,11 @@ def test_import_with_ogr2ogr_without_errors_should_call_the_right_command(

_open.assert_called_once()
_open.assert_called_with(
f"/usr/bin/ogr2ogr --config PG_USE_COPY YES -f PostgreSQL PG:\" dbname='test_geonode_data' host="
"/usr/bin/ogr2ogr --config PG_USE_COPY YES -f PostgreSQL PG:\" dbname='test_geonode_data' host="
+ os.getenv("DATABASE_HOST", "localhost")
+ " port=5432 user='geonode_data' password='geonode_data' \" \""
+ self.valid_csv
+ '" -nln alternate "dataset" -oo KEEP_GEOM_COLUMNS=NO -lco GEOMETRY_NAME=geometry -oo "GEOM_POSSIBLE_NAMES=geom*,the_geom*,wkt_geom" -oo "X_POSSIBLE_NAMES=x,long*" -oo "Y_POSSIBLE_NAMES=y,lat*"',
+ '" -nln alternate "dataset" -oo KEEP_GEOM_COLUMNS=NO -lco GEOMETRY_NAME=geometry -oo "GEOM_POSSIBLE_NAMES=geom*,the_geom*,wkt_geom" -oo "X_POSSIBLE_NAMES=x,long*" -oo "Y_POSSIBLE_NAMES=y,lat*"', # noqa
stdout=-1,
stderr=-1,
shell=True, # noqa
Expand Down
2 changes: 1 addition & 1 deletion importer/handlers/geojson/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_import_with_ogr2ogr_without_errors_should_call_the_right_command(

_open.assert_called_once()
_open.assert_called_with(
f"/usr/bin/ogr2ogr --config PG_USE_COPY YES -f PostgreSQL PG:\" dbname='test_geonode_data' host="
"/usr/bin/ogr2ogr --config PG_USE_COPY YES -f PostgreSQL PG:\" dbname='test_geonode_data' host="
+ os.getenv("DATABASE_HOST", "localhost")
+ " port=5432 user='geonode_data' password='geonode_data' \" \""
+ self.valid_files.get("base_file")
Expand Down
9 changes: 3 additions & 6 deletions importer/handlers/gpkg/tests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import copy
import os
import shutil
from django.test import TestCase, override_settings
Expand Down Expand Up @@ -117,15 +116,13 @@ def test_can_handle_should_return_false_for_other_files(self):
def test_single_message_error_handler(self):
# lets copy the file to the temporary folder
# later will be removed
shutil.copy(self.valid_gpkg, '/tmp')
shutil.copy(self.valid_gpkg, "/tmp")
exec_id = orchestrator.create_execution_request(
user=get_user_model().objects.first(),
func_name="funct1",
step="step",
input_params={
"files": {
"base_file": '/tmp/valid.gpkg'
},
"files": {"base_file": "/tmp/valid.gpkg"},
"skip_existing_layer": True,
"handler_module_path": str(self.handler),
},
Expand All @@ -148,4 +145,4 @@ def test_single_message_error_handler(self):
)

self.assertEqual("FAILURE", TaskResult.objects.get(task_id=str(exec_id)).status)
self.assertFalse(os.path.exists('/tmp/valid.gpkg'))
self.assertFalse(os.path.exists("/tmp/valid.gpkg"))
2 changes: 1 addition & 1 deletion importer/handlers/shapefile/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def test_import_with_ogr2ogr_without_errors_should_call_the_right_command(

_open.assert_called_once()
_open.assert_called_with(
f"/usr/bin/ogr2ogr --config PG_USE_COPY YES -f PostgreSQL PG:\" dbname='test_geonode_data' host="
"/usr/bin/ogr2ogr --config PG_USE_COPY YES -f PostgreSQL PG:\" dbname='test_geonode_data' host="
+ os.getenv("DATABASE_HOST", "localhost")
+ " port=5432 user='geonode_data' password='geonode_data' \" \""
+ self.valid_shp.get("base_file")
Expand Down
Empty file.
9 changes: 9 additions & 0 deletions importer/handlers/sld/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from rest_framework.exceptions import APIException
from rest_framework import status


class InvalidSldException(APIException):
status_code = status.HTTP_400_BAD_REQUEST
default_detail = "The sld provided is invalid"
default_code = "invalid_sld"
category = "importer"
62 changes: 62 additions & 0 deletions importer/handlers/sld/handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import logging

from geonode.resource.manager import resource_manager
from importer.handlers.common.metadata import MetadataFileHandler
from importer.handlers.sld.exceptions import InvalidSldException
from owslib.etree import etree as dlxml

logger = logging.getLogger(__name__)


class SLDFileHandler(MetadataFileHandler):
"""
Handler to import SLD files into GeoNode data db
It must provide the task_lists required to comple the upload
"""

@staticmethod
def can_handle(_data) -> bool:
"""
This endpoint will return True or False if with the info provided
the handler is able to handle the file or not
"""
base = _data.get("base_file")
if not base:
return False
return (
base.endswith(".sld")
if isinstance(base, str)
else base.name.endswith(".sld")
)

@staticmethod
def is_valid(files, user):
"""
Define basic validation steps
"""
# calling base validation checks

try:
with open(files.get("base_file")) as _xml:
dlxml.fromstring(_xml.read().encode())
except Exception as err:
raise InvalidSldException(
f"Uploaded document is not SLD or is invalid: {str(err)}"
)
return True

def handle_metadata_resource(self, _exec, dataset, original_handler):
if original_handler.can_handle_sld_file:
original_handler.handle_sld_file(dataset, _exec)
else:
_path = _exec.input_params.get("files", {}).get(
"sld_file", _exec.input_params.get("base_file", {})
)
resource_manager.exec(
"set_style",
None,
instance=dataset,
sld_file=_exec.input_params.get("files", {}).get("sld_file", ""),
sld_uploaded=True if _path else False,
vals={"dirty_state": True},
)
Loading

0 comments on commit b2b038c

Please sign in to comment.