Skip to content

Commit

Permalink
[Fixes #247] Introducing assets in geonode-importer (#249)
Browse files Browse the repository at this point in the history
* [Fixes #247] Introducing assets in geonode-importer

---------

Co-authored-by: etj <[email protected]>
  • Loading branch information
mattiagiupponi and etj authored Jun 28, 2024
1 parent e0215e4 commit 1f5c926
Show file tree
Hide file tree
Showing 25 changed files with 382 additions and 101 deletions.
2 changes: 2 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[run]
relative_files = True
5 changes: 3 additions & 2 deletions .env_test
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,9 @@ DEBUG=False

SECRET_KEY='myv-y4#7j-d*p-__@j#*3z@!y24fz8%^z2v6atuy4bo9vqr1_a'

STATIC_ROOT=/mnt/volumes/statics/static/
MEDIA_ROOT=/mnt/volumes/statics/uploaded/
STATIC_ROOT=/tmp/statics/static/
MEDIA_ROOT=/tmp/statics/uploaded/
ASSETS_ROOT=/tmp/statics/assets/
GEOIP_PATH=/mnt/volumes/statics/geoip.db

CACHE_BUSTING_STATIC_ENABLED=False
Expand Down
12 changes: 12 additions & 0 deletions .github/workflows/runtests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,18 @@ jobs:
run: docker exec django4importer /bin/sh -c "sh /usr/src/importer/entrypoint_test.sh"
- name: Run geonode-importer tests
run: docker exec django4importer /bin/sh -c "sh /usr/src/importer/runtest.sh"
- name: Coverage comment
id: coverage_comment
uses: py-cov-action/python-coverage-comment-action@v3
with:
GITHUB_TOKEN: ${{ github.token }}

- name: Store Pull Request comment to be posted
uses: actions/upload-artifact@v4
if: steps.coverage_comment.outputs.COMMENT_FILE_WRITTEN == 'true'
with:
name: python-coverage-comment-action
path: python-coverage-comment-action.txt
- name: Stop containers
if: always()
run: docker-compose -f "docker-compose-test.yaml" down
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
FROM geonode/geonode-base:latest-ubuntu-22.04
RUN rm -rf /usr/src/geonode
RUN git clone https://github.com/GeoNode/geonode.git /usr/src/geonode
RUN cd /usr/src/geonode && git fetch --all && git checkout assets_master && cd -
RUN mkdir -p /usr/src/importer

RUN cd ..
Expand Down
66 changes: 63 additions & 3 deletions importer/api/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@

from importer.models import ResourceHandlerInfo
from importer.tests.utils import ImporterBaseTestSupport
from importer.orchestrator import orchestrator
from django.utils.module_loading import import_string
from geonode.assets.models import LocalAsset


class TestImporterViewSet(ImporterBaseTestSupport):
Expand Down Expand Up @@ -50,7 +53,10 @@ def test_raise_exception_if_file_is_not_a_handled(self):
def test_gpkg_raise_error_with_invalid_payload(self):
self.client.force_login(get_user_model().objects.get(username="admin"))
payload = {
"base_file": SimpleUploadedFile(name="test.gpkg", content=b"some-content"),
"base_file": SimpleUploadedFile(
name="test.gpkg",
content=b'{"type": "FeatureCollection", "content": "some-content"}',
),
"store_spatial_files": "invalid",
}
expected = {
Expand All @@ -70,7 +76,10 @@ def test_gpkg_task_is_called(self, patch_upload):

self.client.force_login(get_user_model().objects.get(username="admin"))
payload = {
"base_file": SimpleUploadedFile(name="test.gpkg", content=b"some-content"),
"base_file": SimpleUploadedFile(
name="test.gpkg",
content=b'{"type": "FeatureCollection", "content": "some-content"}',
),
"store_spatial_files": True,
}

Expand All @@ -85,7 +94,8 @@ def test_geojson_task_is_called(self, patch_upload):
self.client.force_login(get_user_model().objects.get(username="admin"))
payload = {
"base_file": SimpleUploadedFile(
name="test.geojson", content=b"some-content"
name="test.geojson",
content=b'{"type": "FeatureCollection", "content": "some-content"}',
),
"store_spatial_files": True,
}
Expand Down Expand Up @@ -153,3 +163,53 @@ def test_copy_ther_resource_if_file_handler_is_set(self, _orc):

self.assertEqual(200, response.status_code)
_orc.s.assert_called_once()

@patch("importer.api.views.import_orchestrator")
def test_asset_is_created_before_the_import_start(self, patch_upload):
patch_upload.apply_async.side_effect = MagicMock()

self.client.force_login(get_user_model().objects.get(username="admin"))
payload = {
"base_file": SimpleUploadedFile(
name="test.geojson",
content=b'{"type": "FeatureCollection", "content": "some-content"}',
),
"store_spatial_files": True,
}

response = self.client.post(self.url, data=payload)

self.assertEqual(201, response.status_code)

self.assertTrue(201, response.status_code)

_exec = orchestrator.get_execution_object(response.json()["execution_id"])

asset_handler = import_string(_exec.input_params["asset_module_path"])
self.assertTrue(asset_handler.objects.filter(id=_exec.input_params["asset_id"]))

asset_handler.objects.filter(id=_exec.input_params["asset_id"]).delete()

@patch("importer.api.views.import_orchestrator")
@patch(
"importer.api.views.UploadLimitValidator.validate_parallelism_limit_per_user"
)
def test_asset_should_be_deleted_if_created_during_with_exception(
self, validate_parallelism_limit_per_user, patch_upload
):
patch_upload.apply_async.s.side_effect = MagicMock()
validate_parallelism_limit_per_user.side_effect = Exception("random exception")

self.client.force_login(get_user_model().objects.get(username="admin"))
payload = {
"base_file": SimpleUploadedFile(
name="test.geojson",
content=b'{"type": "FeatureCollection", "content": "some-content"}',
),
"store_spatial_files": True,
}

response = self.client.post(self.url, data=payload)

self.assertEqual(500, response.status_code)
self.assertFalse(LocalAsset.objects.exists())
52 changes: 46 additions & 6 deletions importer/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from urllib.parse import urljoin
from django.conf import settings
from django.urls import reverse

from pathlib import Path
from geonode.resource.enumerator import ExecutionRequestAction
from django.utils.translation import gettext_lazy as _
from dynamic_rest.filters import DynamicFilterBackend, DynamicSortingFilter
Expand All @@ -46,6 +46,8 @@
from rest_framework.parsers import FileUploadParser, MultiPartParser
from rest_framework.permissions import IsAuthenticatedOrReadOnly
from rest_framework.response import Response
from geonode.assets.handlers import asset_handler_registry
from geonode.assets.local import LocalAssetHandler

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -91,6 +93,8 @@ def create(self, request, *args, **kwargs):
"""
_file = request.FILES.get("base_file") or request.data.get("base_file")
execution_id = None
asset_handler = LocalAssetHandler()
asset_dir = asset_handler._create_asset_dir()

serializer = self.get_serializer_class()
data = serializer(data=request.data)
Expand All @@ -107,27 +111,40 @@ def create(self, request, *args, **kwargs):

if "zip_file" in _data or "kmz_file" in _data:
# if a zipfile is provided, we need to unzip it before searching for an handler
zipname = Path(_data["base_file"].name).stem
storage_manager = StorageManager(
remote_files={"base_file": _data.get("zip_file", _data.get("kmz_file"))}
)
# cloning and unzip the base_file
storage_manager.clone_remote_files()
storage_manager.clone_remote_files(
cloning_directory=asset_dir, create_tempdir=False
)
# update the payload with the unziped paths
_data.update(storage_manager.get_retrieved_paths())
_data.update(
{
**{"original_zip_name": zipname},
**storage_manager.get_retrieved_paths(),
}
)

handler = orchestrator.get_handler(_data)

if _file and handler:
asset = None
try:
# cloning data into a local folder
extracted_params, _data = handler.extract_params_from_data(_data)
if storage_manager is None:
# means that the storage manager is not initialized yet, so
# the file is not a zip
storage_manager = StorageManager(remote_files=_data)
storage_manager.clone_remote_files()
storage_manager.clone_remote_files(
cloning_directory=asset_dir, create_tempdir=False
)
# get filepath
files = storage_manager.get_retrieved_paths()
asset, files = self.generate_asset_and_retrieve_paths(
request, storage_manager, handler
)

upload_validator = UploadLimitValidator(request.user)
upload_validator.validate_parallelism_limit_per_user()
Expand All @@ -144,6 +161,10 @@ def create(self, request, *args, **kwargs):
input_params={
**{"files": files, "handler_module_path": str(handler)},
**extracted_params,
**{
"asset_id": asset.id,
"asset_module_path": f"{asset.__module__}.{asset.__class__.__name__}",
},
},
legacy_upload_name=_file.name,
action=action,
Expand All @@ -159,7 +180,12 @@ def create(self, request, *args, **kwargs):
except Exception as e:
# in case of any exception, is better to delete the
# cloned files to keep the storage under control
if storage_manager is not None:
if asset:
try:
asset.delete()
except Exception as _exc:
logger.warning(_exc)
elif storage_manager is not None:
storage_manager.delete_retrieved_paths(force=True)
if execution_id:
orchestrator.set_as_failed(execution_id=str(execution_id), reason=e)
Expand All @@ -168,6 +194,20 @@ def create(self, request, *args, **kwargs):

raise ImportException(detail="No handlers found for this dataset type")

def generate_asset_and_retrieve_paths(self, request, storage_manager, handler):
asset_handler = asset_handler_registry.get_default_handler()
_files = storage_manager.get_retrieved_paths()
asset = asset_handler.create(
title="Original",
owner=request.user,
description=None,
type=handler.id,
files=list(set(_files.values())),
clone_files=False,
)

return asset, _files


class ResourceImporter(DynamicModelViewSet):
authentication_classes = [
Expand Down
10 changes: 8 additions & 2 deletions importer/celery_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,12 @@ def create_geonode_resource(

_files = _exec.input_params.get("files")

_asset = (
import_string(_exec.input_params.get("asset_module_path"))
.objects.filter(id=_exec.input_params.get("asset_id"))
.first()
)

handler = import_string(handler_module_path)()
_overwrite = _exec.input_params.get("overwrite_existing_layer")

Expand All @@ -337,14 +343,14 @@ def create_geonode_resource(
layer_name=layer_name,
alternate=alternate,
execution_id=execution_id,
files=_files,
asset=_asset,
)
else:
resource = handler.create_geonode_resource(
layer_name=layer_name,
alternate=alternate,
execution_id=execution_id,
files=_files,
asset=_asset,
)

if _overwrite:
Expand Down
2 changes: 1 addition & 1 deletion importer/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def __init__(
self,
files: list,
handler_module_path: str,
user: get_user_model(),
user: get_user_model(), # type: ignore
execution_id: str,
) -> None:
self.files = files
Expand Down
2 changes: 1 addition & 1 deletion importer/handlers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ class BaseVectorFileHandler(BaseHandler):
return

def overwrite_geonode_resource(
self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, files=None
self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, asset=None
):
"""
Base function to override the resource into geonode. Each handler can specify
Expand Down
13 changes: 12 additions & 1 deletion importer/handlers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from geonode.resource.enumerator import ExecutionRequestAction as exa
from geonode.layers.models import Dataset
from importer.api.exception import ImportException
from importer.utils import ImporterRequestAction as ira
from django_celery_results.models import TaskResult
from django.db.models import Q
Expand Down Expand Up @@ -56,9 +57,18 @@ def get_task_list(cls, action) -> tuple:
def default_geometry_column_name(self):
return "geometry"

@property
def id(self):
pk = self.supported_file_extension_config.get("id", None)
if pk is None:
raise ImportException(
"PK must be defined, check that supported_file_extension_config had been correctly defined, it cannot be empty"
)
return pk

@property
def supported_file_extension_config(self):
return NotImplementedError
return {}

@property
def can_handle_xml_file(self) -> bool:
Expand Down Expand Up @@ -149,6 +159,7 @@ def perform_last_step(execution_id):
]
_exec.output_params.update({"resources": resource_output_params})
_exec.save()

return _exec

def fixup_name(self, name):
Expand Down
Loading

0 comments on commit 1f5c926

Please sign in to comment.