Skip to content

Commit

Permalink
Merge pull request #163 from aatmanvaidya/hash-op
Browse files Browse the repository at this point in the history
fix: hash operator and worker
  • Loading branch information
duggalsu committed Mar 12, 2024
2 parents da40ef9 + 64d0797 commit d51aeb3
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 22 deletions.
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@

def initialize(param):
global hashlib
import hashlib


def run(media_path):
file_path = media_path["path"]
with open(file_path, "rb") as f:
file_hash = hashlib.md5()
# file_hash = hashlib.blake2b()
file_hash = hashlib.blake2b()
while chunk := f.read(4092):
file_hash.update(chunk)

return file_hash.hexdigest()


# if __name__ == "__main__":
# media_file_path = r'sample_data/sample-cat-video.mp4'
# media_file_path = {"path": r"core/operators/sample_data/sample-cat-video.mp4"}
# initialize(param={})
# md5_hash = run(media_file_path)
# print(md5_hash)
# print(len(md5_hash))
# file_hash = run(media_file_path)
# print(file_hash)
# print(len(file_hash))
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import unittest
from unittest.case import skip
from core.operators import md5_hash
from core.operators import media_file_hash
from core.models.media_factory import VideoFactory

class Test(unittest.TestCase):
@classmethod
def setUpClass(cls):
# initialize operator
md5_hash.initialize(param={})
media_file_hash.initialize(param={})

@classmethod
def tearDownClass(cls):
Expand All @@ -16,12 +16,12 @@ def tearDownClass(cls):

def test_sample_media_from_disk(self):
media_file_path = VideoFactory.make_from_file_on_disk("core/operators/sample_data/sample-cat-video.mp4")
hash = md5_hash.run(media_file_path)
self.assertEqual(32, len(hash))
hash = media_file_hash.run(media_file_path)
self.assertEqual(128, len(hash))

# @skip
def test_sample_media_from_url(self):
media_url = "https://tattle-media.s3.amazonaws.com/test-data/tattle-search/cat_vid_2mb.mp4"
media_path = VideoFactory.make_from_url(media_url)
hash = md5_hash.run(media_path)
self.assertEqual(32, len(hash))
hash = media_file_hash.run(media_path)
self.assertEqual(128, len(hash))
File renamed without changes.
4 changes: 2 additions & 2 deletions src/worker/md5hash/config.yml → src/worker/hash/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ queue :
operators :
label : "Operators"
parameters :
- name : "md5 hash"
type : "md5_hash"
- name : "Medial file Hash"
type : "media_file_hash"
parameters: {}
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
from time import sleep

try:
feluda = Feluda("worker/md5hash/config.yml")
feluda = Feluda("worker/hash/config.yml")
feluda.setup()
count_queue = feluda.config.queue.parameters.queues[0]['name']
feluda.start_component(ComponentType.QUEUE)

for _ in range(25):
for _ in range(5):
dummy_payload = {
"id": str(12345),
"path": 'https://raw.githubusercontent.com/tattle-made/feluda/main/src/core/operators/sample_data/sample-cat-video.mp4'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from core.feluda import ComponentType, Feluda
from core.logger import Logger
from core.operators import md5_hash
from core.operators import media_file_hash
import json
from datetime import datetime
from core.models.media import MediaType
Expand Down Expand Up @@ -47,7 +47,7 @@ def worker(ch, method, properties, body):
video_path = VideoFactory.make_from_url(file_content['path'])
try:
log.info("Processing file")
hash = md5_hash.run(video_path)
hash = media_file_hash.run(video_path)
log.info(hash)
report = make_report_indexed(file_content, "indexed")
feluda.queue.message(feluda.config.queue.parameters.queues[1]['name'], report)
Expand All @@ -61,11 +61,11 @@ def worker(ch, method, properties, body):
return worker

try:
feluda = Feluda("worker/md5hash/config.yml")
feluda = Feluda("worker/hash/config.yml")
feluda.setup()
count_queue = feluda.config.queue.parameters.queues[0]['name']
feluda.start_component(ComponentType.QUEUE)
md5_hash.initialize(param=None)
media_file_hash.initialize(param=None)
feluda.queue.listen(count_queue, indexer(feluda))
except Exception as e:
print("Error Initializing Indexer", e)
Expand Down

0 comments on commit d51aeb3

Please sign in to comment.