Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete uploaded file in case of fail #228

Merged
merged 4 commits into from
Feb 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion importer/celery_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@ def rollback(self, *args, **kwargs):
find_key_recursively(kwargs, "error")
or "Some issue has occured, please check the logs"
)
orchestrator.set_as_failed(exec_id, reason=error)
orchestrator.set_as_failed(exec_id, reason=error, delete_file=False)
return exec_id, kwargs


Expand Down
1 change: 1 addition & 0 deletions importer/handlers/common/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,7 @@ def rollback(
):
steps = self.ACTIONS.get(action_to_rollback)
if rollback_from_step not in steps:
logger.info(f"Step not found {rollback_from_step}, skipping")
return
step_index = steps.index(rollback_from_step)
# the start_import, start_copy etc.. dont do anything as step, is just the start
Expand Down
2 changes: 1 addition & 1 deletion importer/handlers/common/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,7 @@ def rollback(
):
steps = self.ACTIONS.get(action_to_rollback)
if rollback_from_step not in steps:
logger.info("Step not found, skipping")
logger.info(f"Step not found {rollback_from_step}, skipping")
return
step_index = steps.index(rollback_from_step)
# the start_import, start_copy etc.. dont do anything as step, is just the start
Expand Down
14 changes: 12 additions & 2 deletions importer/handlers/gpkg/tests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from django.test import TestCase
import copy
import os
import shutil
from django.test import TestCase, override_settings
from importer.handlers.gpkg.exceptions import InvalidGeopackageException
from django.contrib.auth import get_user_model
from importer.handlers.gpkg.handler import GPKGFileHandler
Expand Down Expand Up @@ -110,13 +113,19 @@ def test_can_handle_should_return_false_for_other_files(self):
actual = self.handler.can_handle({"base_file": "random.file"})
self.assertFalse(actual)

@override_settings(MEDIA_ROOT="/tmp/")
def test_single_message_error_handler(self):
# lets copy the file to the temporary folder
# later will be removed
shutil.copy(self.valid_gpkg, '/tmp')
exec_id = orchestrator.create_execution_request(
user=get_user_model().objects.first(),
func_name="funct1",
step="step",
input_params={
"files": self.valid_files,
"files": {
"base_file": '/tmp/valid.gpkg'
},
"skip_existing_layer": True,
"handler_module_path": str(self.handler),
},
Expand All @@ -139,3 +148,4 @@ def test_single_message_error_handler(self):
)

self.assertEqual("FAILURE", TaskResult.objects.get(task_id=str(exec_id)).status)
self.assertFalse(os.path.exists('/tmp/valid.gpkg'))
15 changes: 13 additions & 2 deletions importer/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
from celery import states
from django.contrib.auth import get_user_model
from django.db.models import Q
from django.db.transaction import rollback
from django.utils import timezone
from django.utils.module_loading import import_string
from django_celery_results.models import TaskResult
from geonode.base.enumerations import STATE_INVALID, STATE_PROCESSED, STATE_RUNNING
from geonode.resource.models import ExecutionRequest
from geonode.upload.models import Upload
from geonode.storage.manager import storage_manager
from rest_framework import serializers

from importer.api.exception import ImportException
Expand Down Expand Up @@ -157,7 +157,7 @@ def perform_next_step(
self.set_as_failed(execution_id, reason=error_handler(e, execution_id))
raise e

def set_as_failed(self, execution_id, reason=None):
def set_as_failed(self, execution_id, reason=None, delete_file=True):
"""
Utility method to set the ExecutionRequest object to fail
"""
Expand All @@ -169,6 +169,17 @@ def set_as_failed(self, execution_id, reason=None):
log=reason,
legacy_status=STATE_INVALID,
)
# delete
if delete_file:
exec_obj = self.get_execution_object(execution_id)
_files = exec_obj.input_params.get("files")
# better to delete each single file since it can be a remote storage service
if _files:
logger.info(
"Execution failed, removing uploaded file to save disk space"
)
for _file in _files.values():
storage_manager.delete(_file)

def set_as_partially_failed(self, execution_id, reason=None):
"""
Expand Down
Empty file modified importer/tests/fixture/valid.gpkg
100644 → 100755
Empty file.
12 changes: 10 additions & 2 deletions importer/tests/unit/test_orchestrator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
import uuid
from django.conf import settings
from django.contrib.auth import get_user_model
from geonode.tests.base import GeoNodeBaseTestSupport
from unittest.mock import patch
Expand Down Expand Up @@ -178,13 +180,19 @@ def test_perform_with_error_set_invalid_status(self, mock_celery):
self.assertIsNotNone(Upload.objects.get(metadata__icontains=_id))

def test_set_as_failed(self):
# creating the temporary file that will be deleted
fake_path = f"{settings.MEDIA_ROOT}/file.txt"
with open(fake_path, "w"):
pass

self.assertTrue(os.path.exists(fake_path))
# we need to create first the execution
_uuid = self.orchestrator.create_execution_request(
user=get_user_model().objects.first(),
func_name="name",
step="importer.create_geonode_resource", # adding the first step for the GPKG file
input_params={
"files": {"base_file": "/tmp/file.txt"},
"files": {"base_file": fake_path},
"store_spatial_files": True,
},
)
Expand All @@ -195,7 +203,7 @@ def test_set_as_failed(self):
req = ExecutionRequest.objects.get(exec_id=_uuid)
self.assertTrue(req.status, ExecutionRequest.STATUS_FAILED)
self.assertTrue(req.log, "automatic test")

self.assertFalse(os.path.exists(fake_path))
# check legacy execution status
legacy = Upload.objects.filter(metadata__contains=_uuid)
self.assertTrue(legacy.exists())
Expand Down
4 changes: 3 additions & 1 deletion importer/tests/unit/test_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@ def test_copy_geonode_resource(self, async_call):
@patch(
"importer.handlers.gpkg.handler.GPKGFileHandler._create_geonode_resource_rollback"
)
@override_settings(MEDIA_ROOT="/tmp/")
def test_rollback_works_as_expected_vector_step(
self,
_create_geonode_resource_rollback,
Expand Down Expand Up @@ -412,6 +413,7 @@ def test_rollback_works_as_expected_vector_step(
@patch(
"importer.handlers.geotiff.handler.GeoTiffFileHandler._create_geonode_resource_rollback"
)
@override_settings(MEDIA_ROOT="/tmp/")
def test_rollback_works_as_expected_raster(
self,
_create_geonode_resource_rollback,
Expand Down Expand Up @@ -444,7 +446,7 @@ def test_rollback_works_as_expected_raster(
step=conf[0], # step name
action="import",
input_params={
"files": {"base_file": "/filepath"},
"files": {"base_file": "/tmp/filepath"},
"overwrite_existing_layer": True,
"store_spatial_files": True,
"handler_module_path": "importer.handlers.geotiff.handler.GeoTiffFileHandler",
Expand Down