From 64d0797b98ffb90ce5569f587b6403b9373893cf Mon Sep 17 00:00:00 2001 From: Aatman Vaidya Date: Tue, 12 Mar 2024 16:43:48 +0530 Subject: [PATCH] fix: hash operator and worker --- .../{md5_hash.py => media_file_hash.py} | 16 ++++++++-------- ...{test_md5_hash.py => test_media_file_hash.py} | 12 ++++++------ .../Dockerfile.hash_worker} | 0 .../Dockerfile.hash_worker.graviton} | 0 src/worker/{md5hash => hash}/config.yml | 4 ++-- .../hash_payload_writer.py} | 4 ++-- .../md5hash_worker.py => hash/hash_worker.py} | 8 ++++---- 7 files changed, 22 insertions(+), 22 deletions(-) rename src/core/operators/{md5_hash.py => media_file_hash.py} (58%) rename src/core/operators/{test_md5_hash.py => test_media_file_hash.py} (71%) rename src/worker/{md5hash/Dockerfile.md5hash_worker => hash/Dockerfile.hash_worker} (100%) rename src/worker/{md5hash/Dockerfile.md5hash_worker.graviton => hash/Dockerfile.hash_worker.graviton} (100%) rename src/worker/{md5hash => hash}/config.yml (79%) rename src/worker/{md5hash/md5hash_payload_writer.py => hash/hash_payload_writer.py} (89%) rename src/worker/{md5hash/md5hash_worker.py => hash/hash_worker.py} (93%) diff --git a/src/core/operators/md5_hash.py b/src/core/operators/media_file_hash.py similarity index 58% rename from src/core/operators/md5_hash.py rename to src/core/operators/media_file_hash.py index abd043a9..4fb5d01a 100644 --- a/src/core/operators/md5_hash.py +++ b/src/core/operators/media_file_hash.py @@ -1,21 +1,21 @@ - def initialize(param): global hashlib import hashlib + def run(media_path): file_path = media_path["path"] with open(file_path, "rb") as f: - file_hash = hashlib.md5() - # file_hash = hashlib.blake2b() + file_hash = hashlib.blake2b() while chunk := f.read(4092): file_hash.update(chunk) - + return file_hash.hexdigest() + # if __name__ == "__main__": -# media_file_path = r'sample_data/sample-cat-video.mp4' +# media_file_path = {"path": r"core/operators/sample_data/sample-cat-video.mp4"} # initialize(param={}) -# md5_hash = run(media_file_path) -# print(md5_hash) -# print(len(md5_hash)) \ No newline at end of file +# file_hash = run(media_file_path) +# print(file_hash) +# print(len(file_hash)) diff --git a/src/core/operators/test_md5_hash.py b/src/core/operators/test_media_file_hash.py similarity index 71% rename from src/core/operators/test_md5_hash.py rename to src/core/operators/test_media_file_hash.py index d2febae8..2e248619 100644 --- a/src/core/operators/test_md5_hash.py +++ b/src/core/operators/test_media_file_hash.py @@ -1,13 +1,13 @@ import unittest from unittest.case import skip -from core.operators import md5_hash +from core.operators import media_file_hash from core.models.media_factory import VideoFactory class Test(unittest.TestCase): @classmethod def setUpClass(cls): # initialize operator - md5_hash.initialize(param={}) + media_file_hash.initialize(param={}) @classmethod def tearDownClass(cls): @@ -16,12 +16,12 @@ def tearDownClass(cls): def test_sample_media_from_disk(self): media_file_path = VideoFactory.make_from_file_on_disk("core/operators/sample_data/sample-cat-video.mp4") - hash = md5_hash.run(media_file_path) - self.assertEqual(32, len(hash)) + hash = media_file_hash.run(media_file_path) + self.assertEqual(128, len(hash)) # @skip def test_sample_media_from_url(self): media_url = "https://tattle-media.s3.amazonaws.com/test-data/tattle-search/cat_vid_2mb.mp4" media_path = VideoFactory.make_from_url(media_url) - hash = md5_hash.run(media_path) - self.assertEqual(32, len(hash)) \ No newline at end of file + hash = media_file_hash.run(media_path) + self.assertEqual(128, len(hash)) \ No newline at end of file diff --git a/src/worker/md5hash/Dockerfile.md5hash_worker b/src/worker/hash/Dockerfile.hash_worker similarity index 100% rename from src/worker/md5hash/Dockerfile.md5hash_worker rename to src/worker/hash/Dockerfile.hash_worker diff --git a/src/worker/md5hash/Dockerfile.md5hash_worker.graviton b/src/worker/hash/Dockerfile.hash_worker.graviton similarity index 100% rename from src/worker/md5hash/Dockerfile.md5hash_worker.graviton rename to src/worker/hash/Dockerfile.hash_worker.graviton diff --git a/src/worker/md5hash/config.yml b/src/worker/hash/config.yml similarity index 79% rename from src/worker/md5hash/config.yml rename to src/worker/hash/config.yml index 74354221..5ac5fe4e 100644 --- a/src/worker/md5hash/config.yml +++ b/src/worker/hash/config.yml @@ -10,6 +10,6 @@ queue : operators : label : "Operators" parameters : - - name : "md5 hash" - type : "md5_hash" + - name : "Medial file Hash" + type : "media_file_hash" parameters: {} diff --git a/src/worker/md5hash/md5hash_payload_writer.py b/src/worker/hash/hash_payload_writer.py similarity index 89% rename from src/worker/md5hash/md5hash_payload_writer.py rename to src/worker/hash/hash_payload_writer.py index e4f70f86..7a2f3ec3 100644 --- a/src/worker/md5hash/md5hash_payload_writer.py +++ b/src/worker/hash/hash_payload_writer.py @@ -4,12 +4,12 @@ from time import sleep try: - feluda = Feluda("worker/md5hash/config.yml") + feluda = Feluda("worker/hash/config.yml") feluda.setup() count_queue = feluda.config.queue.parameters.queues[0]['name'] feluda.start_component(ComponentType.QUEUE) - for _ in range(25): + for _ in range(5): dummy_payload = { "id": str(12345), "path": 'https://raw.githubusercontent.com/tattle-made/feluda/main/src/core/operators/sample_data/sample-cat-video.mp4' diff --git a/src/worker/md5hash/md5hash_worker.py b/src/worker/hash/hash_worker.py similarity index 93% rename from src/worker/md5hash/md5hash_worker.py rename to src/worker/hash/hash_worker.py index 789fa032..8ac16d97 100644 --- a/src/worker/md5hash/md5hash_worker.py +++ b/src/worker/hash/hash_worker.py @@ -1,6 +1,6 @@ from core.feluda import ComponentType, Feluda from core.logger import Logger -from core.operators import md5_hash +from core.operators import media_file_hash import json from datetime import datetime from core.models.media import MediaType @@ -47,7 +47,7 @@ def worker(ch, method, properties, body): video_path = VideoFactory.make_from_url(file_content['path']) try: log.info("Processing file") - hash = md5_hash.run(video_path) + hash = media_file_hash.run(video_path) log.info(hash) report = make_report_indexed(file_content, "indexed") feluda.queue.message(feluda.config.queue.parameters.queues[1]['name'], report) @@ -61,11 +61,11 @@ def worker(ch, method, properties, body): return worker try: - feluda = Feluda("worker/md5hash/config.yml") + feluda = Feluda("worker/hash/config.yml") feluda.setup() count_queue = feluda.config.queue.parameters.queues[0]['name'] feluda.start_component(ComponentType.QUEUE) - md5_hash.initialize(param=None) + media_file_hash.initialize(param=None) feluda.queue.listen(count_queue, indexer(feluda)) except Exception as e: print("Error Initializing Indexer", e)