-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcreate_dictionaries.m
103 lines (69 loc) · 3.01 KB
/
create_dictionaries.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
function [dictionary] = create_dictionaries(datasetDir,params,dataset,dictDir)
% CREATE_DICTIONARIES generates a vocabulary of visual words using K-means
%
% CREATE_DICTIONARIES(descriptors_path,params,dict_path) groups randomly
% selected descriptors from a specific training set into
% clusters using k-means.
%
% Requirements: k-means code from Liefeng Bo
% (http://homes.cs.washington.edu/~lfb/)
%
% See also FEATURE_EXTRACTION and BUILD_HISTOGRAMS
%
% Author: Jose Rivera-Rubio @ BICV group Imperial College London
% http://www.bicv.org
%
% Date: May, 2014
if ~isfield(params.kmeans,'maxNumFeats')
maxNumFeats = 100000; %use 4% avalible memory if its greater than the default
end
maxNumFeats = params.kmeans.maxNumFeats;
maxNumFeatsImg = maxNumFeats/params.numTrainImages;
featSuffix = params.feat;
dictFname = sprintf('dictionary_%d.mat',params.dictionarySize);
savePath = fullfile(dictDir,dictFname);
if (exist(savePath,'file'))
fprintf('File exists! Skipping %s \n',dictFname);
load(savePath);
else % Build vocabulary
fprintf('\nBuilding vocabulary of visual words:\n');
% Load all the descriptors of the training set and concatenate them into a
% single array:
allDescriptors = [];
for cat = 1:length(dataset)
catPath = fullfile(datasetDir,dataset(cat).className);
trainLabels = find(dataset(cat).train_id);
for t = 1:length(trainLabels)
[~,imgFname,~] = fileparts(dataset(cat).files{t});
featFname = fullfile(catPath,[imgFname '.' featSuffix]);
load(featFname,'features','-mat');
data2add = features.data;
numDesc = size(features.data,1);
% Add a balanced amount the descriptors per image
if numDesc > maxNumFeatsImg
p = randi(numDesc,1,floor(maxNumFeatsImg));
data2add = data2add(p,:);
end
allDescriptors = [allDescriptors ; data2add];
end % end for trainLabels
% Select the maxNumFeats
totalDesc = size(allDescriptors,1);
if totalDesc > maxNumFeats
fprintf('Reducing to %d descriptors\n', maxNumFeats);
p = randi(totalDesc,1,maxNumFeats);
allDescriptors = allDescriptors(p,:);
end
end % for categories
% Perform clustering
fprintf('\nRunning K-means...\n');
dictionary = kmeans_bo(double(allDescriptors),params.dictionarySize,...
params.kmeans.maxIter); % BOVW Codebook
dictionary = dictionary'; % Back to num_words x desc_dim size
% Saving the dictionary
fprintf('Saving BOVW dictionary...\n');
mkdir(dictDir);
save(savePath,'dictionary');
fprintf('Done.\n');
end % end if/else build vocab
end % end create ditionaries