Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added audio length (in seconds) to database. Added relative confidence value #148

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ mp3
*.mp3
.DS_Store
*.cnf
build/
.idea/
PyDejavu.egg-info/
2 changes: 1 addition & 1 deletion dejavu.cnf.SAMPLE
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"database": {
"host": "127.0.0.1",
"user": "root",
"passwd": "",
"passwd": "",
"db": "dejavu"
}
}
31 changes: 22 additions & 9 deletions dejavu/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class Dejavu(object):
MATCH_TIME = 'match_time'
OFFSET = 'offset'
OFFSET_SECS = 'offset_seconds'
AUDIO_LENGTH = 'audio_length'
RELATIVE_CONFIDENCE = 'relative_confidence'

def __init__(self, config):
super(Dejavu, self).__init__()
Expand Down Expand Up @@ -74,7 +76,7 @@ def fingerprint_directory(self, path, extensions, nprocesses=None):
# Loop till we have all of them
while True:
try:
song_name, hashes, file_hash = iterator.next()
song_name, hashes, file_hash, audio_length = iterator.next()
except multiprocessing.TimeoutError:
continue
except StopIteration:
Expand All @@ -84,12 +86,14 @@ def fingerprint_directory(self, path, extensions, nprocesses=None):
# Print traceback because we can't reraise it here
traceback.print_exc(file=sys.stdout)
else:
sid = self.db.insert_song(song_name, file_hash)
print("Inserting " + song_name + " in database")
sid = self.db.insert_song(song_name, file_hash, audio_length)

self.db.insert_hashes(sid, hashes)
self.db.set_song_fingerprinted(sid)
self.get_fingerprinted_songs()

print(song_name + " inserted in database")
pool.close()
pool.join()

Expand All @@ -101,22 +105,29 @@ def fingerprint_file(self, filepath, song_name=None):
if song_hash in self.songhashes_set:
print "%s already fingerprinted, continuing..." % song_name
else:
song_name, hashes, file_hash = _fingerprint_worker(
song_name, hashes, file_hash, audio_length = _fingerprint_worker(
filepath,
self.limit,
song_name=song_name
)
sid = self.db.insert_song(song_name, file_hash)
print("Inserting " + song_name + " in database")
sid = self.db.insert_song(song_name, file_hash, audio_length)

self.db.insert_hashes(sid, hashes)
self.db.set_song_fingerprinted(sid)
self.get_fingerprinted_songs()
print(song_name + " inserted in database")

def find_matches(self, samples, Fs=fingerprint.DEFAULT_FS):
hashes = fingerprint.fingerprint(samples, Fs=Fs)
return self.db.return_matches(hashes)

def align_matches(self, matches):
mapper = {}
total_hashes = 0
for hash, offset in hashes:
mapper[hash.upper()[:fingerprint.FINGERPRINT_REDUCTION]] = offset
total_hashes += 1
return (self.db.return_matches(mapper), total_hashes)

def align_matches(self, matches, total_hashes):
"""
Finds hash matches that align in time with other matches and finds
consensus about which hashes are "true" signal from the audio.
Expand Down Expand Up @@ -157,6 +168,8 @@ def align_matches(self, matches):
Dejavu.SONG_ID : song_id,
Dejavu.SONG_NAME : songname,
Dejavu.CONFIDENCE : largest_count,
Dejavu.AUDIO_LENGTH : song.get(Database.AUDIO_LENGTH, None),
Dejavu.RELATIVE_CONFIDENCE : (largest_count*100)/float(total_hashes),
Dejavu.OFFSET : int(largest),
Dejavu.OFFSET_SECS : nseconds,
Database.FIELD_FILE_SHA1 : song.get(Database.FIELD_FILE_SHA1, None),}
Expand All @@ -177,7 +190,7 @@ def _fingerprint_worker(filename, limit=None, song_name=None):

songname, extension = os.path.splitext(os.path.basename(filename))
song_name = song_name or songname
channels, Fs, file_hash = decoder.read(filename, limit)
channels, Fs, file_hash, audio_length = decoder.read(filename, limit)
result = set()
channel_amount = len(channels)

Expand All @@ -191,7 +204,7 @@ def _fingerprint_worker(filename, limit=None, song_name=None):
filename))
result |= set(hashes)

return song_name, result, file_hash
return song_name, result, file_hash, audio_length


def chunkify(lst, n):
Expand Down
1 change: 1 addition & 0 deletions dejavu/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class Database(object):
FIELD_SONGNAME = 'song_name'
FIELD_OFFSET = 'offset'
FIELD_HASH = 'hash'
AUDIO_LENGTH = 'audio_length'

# Name of your Database subclass, this is used in configuration
# to refer to your class
Expand Down
62 changes: 36 additions & 26 deletions dejavu/database_sql.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
from __future__ import absolute_import
from itertools import izip_longest
import Queue
import math

import MySQLdb as mysql
from MySQLdb.cursors import DictCursor
import pymysql as mysql
from pymysql.cursors import DictCursor

from dejavu.database import Database
from dejavu.fingerprint import FINGERPRINT_REDUCTION

from multiprocessing import cpu_count

from itertools import chain

class SQLDatabase(Database):
"""
Expand Down Expand Up @@ -56,14 +61,14 @@ class SQLDatabase(Database):
# creates
CREATE_FINGERPRINTS_TABLE = """
CREATE TABLE IF NOT EXISTS `%s` (
`%s` binary(10) not null,
`%s` binary (%s) not null,
`%s` mediumint unsigned not null,
`%s` int unsigned not null,
INDEX (%s),
UNIQUE KEY `unique_constraint` (%s, %s, %s),
FOREIGN KEY (%s) REFERENCES %s(%s) ON DELETE CASCADE
) ENGINE=INNODB;""" % (
FINGERPRINTS_TABLENAME, Database.FIELD_HASH,
FINGERPRINTS_TABLENAME, Database.FIELD_HASH, str(math.ceil(FINGERPRINT_REDUCTION/2.)),
Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH,
Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH,
Database.FIELD_SONG_ID, SONGS_TABLENAME, Database.FIELD_SONG_ID
Expand All @@ -75,11 +80,12 @@ class SQLDatabase(Database):
`%s` varchar(250) not null,
`%s` tinyint default 0,
`%s` binary(20) not null,
`%s` float,
PRIMARY KEY (`%s`),
UNIQUE KEY `%s` (`%s`)
) ENGINE=INNODB;""" % (
SONGS_TABLENAME, Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, FIELD_FINGERPRINTED,
Database.FIELD_FILE_SHA1,
Database.FIELD_FILE_SHA1, Database.AUDIO_LENGTH,
Database.FIELD_SONG_ID, Database.FIELD_SONG_ID, Database.FIELD_SONG_ID,
)

Expand All @@ -89,8 +95,8 @@ class SQLDatabase(Database):
(UNHEX(%%s), %%s, %%s);
""" % (FINGERPRINTS_TABLENAME, Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET)

INSERT_SONG = "INSERT INTO %s (%s, %s) values (%%s, UNHEX(%%s));" % (
SONGS_TABLENAME, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1)
INSERT_SONG = "INSERT INTO %s (%s, %s, %s) values (%%s, UNHEX(%%s), %%s);" % (
SONGS_TABLENAME, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.AUDIO_LENGTH)

# selects
SELECT = """
Expand All @@ -107,8 +113,8 @@ class SQLDatabase(Database):
""" % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME)

SELECT_SONG = """
SELECT %s, HEX(%s) as %s FROM %s WHERE %s = %%s;
""" % (Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1, SONGS_TABLENAME, Database.FIELD_SONG_ID)
SELECT %s, HEX(%s) as %s, %s FROM %s WHERE %s = %%s;
""" % (Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1, Database.AUDIO_LENGTH, SONGS_TABLENAME, Database.FIELD_SONG_ID)

SELECT_NUM_FINGERPRINTS = """
SELECT COUNT(*) as n FROM %s
Expand Down Expand Up @@ -234,12 +240,12 @@ def insert(self, hash, sid, offset):
with self.cursor() as cur:
cur.execute(self.INSERT_FINGERPRINT, (hash, sid, offset))

def insert_song(self, songname, file_hash):
def insert_song(self, songname, file_hash, audio_length):
"""
Inserts song in the database and returns the ID of the inserted record.
"""
with self.cursor() as cur:
cur.execute(self.INSERT_SONG, (songname, file_hash))
cur.execute(self.INSERT_SONG, (songname, file_hash, audio_length))
return cur.lastrowid

def query(self, hash):
Expand Down Expand Up @@ -272,34 +278,38 @@ def insert_hashes(self, sid, hashes):
for hash, offset in hashes:
values.append((hash, sid, offset))

base_query = "INSERT IGNORE INTO fingerprints (%s, %s, %s) values " % (Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET)
with self.cursor() as cur:
values.sort(key=lambda tup: tup[0])
cur.execute("START TRANSACTION;")
for split_values in grouper(values, 1000):
cur.executemany(self.INSERT_FINGERPRINT, split_values)
values2tuple = tuple(chain.from_iterable(split_values))
query = base_query + ', '.join(['(UNHEX(%s), %s, %s)'] * len(split_values))
query += ";"
cur.execute(query, values2tuple)
cur.execute("COMMIT;")


def return_matches(self, hashes):
def return_matches(self, mapper):
"""
Return the (song_id, offset_diff) tuples associated with
a list of (sha1, sample_offset) values.
"""
# Create a dictionary of hash => offset pairs for later lookups
mapper = {}
for hash, offset in hashes:
mapper[hash.upper()] = offset

# Get an iteratable of all the hashes we need
values = mapper.keys()

with self.cursor() as cur:
for split_values in grouper(values, 1000):
# Create our IN part of the query
query = self.SELECT_MULTIPLE
query = query % ', '.join(['UNHEX(%s)'] * len(split_values))
# Create our IN part of the query
query = self.SELECT_MULTIPLE
query = query % ', '.join(['UNHEX(%s)'] * len(values))

cur.execute(query, split_values)
cur.execute(query, values)

for hash, sid, offset in cur:
# (sid, db_offset - song_sampled_offset)
yield (sid, offset - mapper[hash])
for hash, sid, offset in cur:
# (sid, db_offset - song_sampled_offset)
yield (sid, offset - mapper[hash])

def __getstate__(self):
return (self._options,)
Expand Down Expand Up @@ -333,11 +343,11 @@ class Cursor(object):
cur.execute(query)
```
"""
_cache = Queue.Queue(maxsize=5)

def __init__(self, cursor_type=mysql.cursors.Cursor, **options):
super(Cursor, self).__init__()

self._cache = Queue.Queue(maxsize=cpu_count())
try:
conn = self._cache.get_nowait()
except Queue.Empty:
Expand All @@ -352,7 +362,7 @@ def __init__(self, cursor_type=mysql.cursors.Cursor, **options):

@classmethod
def clear_cache(cls):
cls._cache = Queue.Queue(maxsize=5)
cls._cache = Queue.Queue(maxsize=cpu_count())

def __enter__(self):
self.cursor = self.conn.cursor(self.cursor_type)
Expand Down
3 changes: 1 addition & 2 deletions dejavu/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ def read(filename, limit=None):
channels = []
for chn in audiofile:
channels.append(chn)

return channels, audiofile.frame_rate, unique_hash(filename)
return channels, audiofile.frame_rate, unique_hash(filename), float(len(audiofile))/1000.0


def path_to_songname(path):
Expand Down
22 changes: 7 additions & 15 deletions dejavu/fingerprint.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
from scipy.ndimage.filters import maximum_filter
from scipy.ndimage.morphology import (generate_binary_structure,
iterate_structure, binary_erosion)
Expand Down Expand Up @@ -59,7 +58,11 @@
# Number of bits to throw away from the front of the SHA1 hash in the
# fingerprint calculation. The more you throw away, the less storage, but
# potentially higher collisions and misclassifications when identifying songs.
FINGERPRINT_REDUCTION = 20
FINGERPRINT_REDUCTION = 40

# SHA1 has 40 hexadecimal chars to encode
FINGERPRINT_REDUCTION = (40 if FINGERPRINT_REDUCTION > 40 else FINGERPRINT_REDUCTION)
FINGERPRINT_REDUCTION = (FINGERPRINT_REDUCTION + 1 if FINGERPRINT_REDUCTION % 2 == 1 else FINGERPRINT_REDUCTION)

def fingerprint(channel_samples, Fs=DEFAULT_FS,
wsize=DEFAULT_WINDOW_SIZE,
Expand All @@ -83,13 +86,13 @@ def fingerprint(channel_samples, Fs=DEFAULT_FS,
arr2D[arr2D == -np.inf] = 0 # replace infs with zeros

# find local maxima
local_maxima = get_2D_peaks(arr2D, plot=False, amp_min=amp_min)
local_maxima = get_2D_peaks(arr2D, amp_min=amp_min)

# return hashes
return generate_hashes(local_maxima, fan_value=fan_value)


def get_2D_peaks(arr2D, plot=False, amp_min=DEFAULT_AMP_MIN):
def get_2D_peaks(arr2D, amp_min=DEFAULT_AMP_MIN):
# http://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.morphology.iterate_structure.html#scipy.ndimage.morphology.iterate_structure
struct = generate_binary_structure(2, 1)
neighborhood = iterate_structure(struct, PEAK_NEIGHBORHOOD_SIZE)
Expand All @@ -116,17 +119,6 @@ def get_2D_peaks(arr2D, plot=False, amp_min=DEFAULT_AMP_MIN):
frequency_idx = [x[1] for x in peaks_filtered]
time_idx = [x[0] for x in peaks_filtered]

if plot:
# scatter of the peaks
fig, ax = plt.subplots()
ax.imshow(arr2D)
ax.scatter(time_idx, frequency_idx)
ax.set_xlabel('Time')
ax.set_ylabel('Frequency')
ax.set_title("Spectrogram")
plt.gca().invert_yaxis()
plt.show()

return zip(frequency_idx, time_idx)


Expand Down
9 changes: 6 additions & 3 deletions dejavu/recognize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@ def __init__(self, dejavu):

def _recognize(self, *data):
matches = []
total_hashes = 0
for d in data:
matches.extend(self.dejavu.find_matches(d, Fs=self.Fs))
return self.dejavu.align_matches(matches)
extracted_matches = self.dejavu.find_matches(d, Fs=self.Fs)
total_hashes += extracted_matches[1]
matches.extend(extracted_matches[0])
return self.dejavu.align_matches(matches, total_hashes)

def recognize(self):
pass # base class does nothing
Expand All @@ -26,7 +29,7 @@ def __init__(self, dejavu):
super(FileRecognizer, self).__init__(dejavu)

def recognize_file(self, filename):
frames, self.Fs, file_hash = decoder.read(filename, self.dejavu.limit)
frames, self.Fs, file_hash, audio_length = decoder.read(filename, self.dejavu.limit)

t = time.time()
match = self._recognize(*frames)
Expand Down
8 changes: 5 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
### BEGIN ###
pydub>=0.9.4
PyAudio>=0.2.7
numpy>=1.8.2
scipy>=0.12.1
matplotlib>=1.3.1
numpy==1.13.3
scipy==1.0.0
matplotlib==2.1.2
pymysql==0.9.3
### END ###