Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#342 first try at adding specified layer loading #343

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions python_bindings/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,10 @@ class Index {
return ids;
}

py::list getFirstLayer(int layer) {
return appr_alg->get_linklist_at_level(appr_alg->entry, layer);
}


py::dict getAnnData() const { /* WARNING: Index::getAnnData is not thread-safe with Index::addItems */

Expand Down
43 changes: 43 additions & 0 deletions python_bindings/tests/bindings_test_getdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,46 @@ def testGettingItems(self):
# After adding them, all labels should be retrievable
returned_items = p.get_items(labels)
self.assertSequenceEqual(data.tolist(), returned_items)


def testGettingItems(self):
print("\n**** Getting the data by layer ****\n")

dim = 16
num_elements = 10000

# Generating sample data
data = np.float32(np.random.random((num_elements, dim)))
labels = np.arange(0, num_elements)

# Declaring index
p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip

# Initiating index
# max_elements - the maximum number of elements, should be known beforehand
# (probably will be made optional in the future)
#
# ef_construction - controls index search speed/build speed tradeoff
# M - is tightly connected with internal dimensionality of the data
# strongly affects the memory consumption

p.init_index(max_elements=num_elements, ef_construction=100, M=16)

# Controlling the recall by setting ef:
# higher ef leads to better accuracy, but slower search
p.set_ef(100)

p.set_num_threads(4) # by default using all available cores

# Before adding anything, getting any labels should fail
self.assertRaises(Exception, lambda: p.get_items(labels))

print("Adding all elements (%d)" % (len(data)))
p.add_items(data, labels)

# After adding them, all labels should be retrievable
returned_items = p.get_items(labels)
self.assertSequenceEqual(data.tolist(), returned_items)

data = p.getFirstLayer(layer=0)
print(data)
8 changes: 4 additions & 4 deletions sift_1b.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,22 +231,22 @@ inline bool exists_test(const std::string &name) {
void sift_test1B() {


int subset_size_milllions = 200;
int subset_size_millions = 200;
int efConstruction = 40;
int M = 16;


size_t vecsize = subset_size_milllions * 1000000;
size_t vecsize = subset_size_millions * 1000000;

size_t qsize = 10000;
size_t vecdim = 128;
char path_index[1024];
char path_gt[1024];
char *path_q = "../bigann/bigann_query.bvecs";
char *path_data = "../bigann/bigann_base.bvecs";
sprintf(path_index, "sift1b_%dm_ef_%d_M_%d.bin", subset_size_milllions, efConstruction, M);
sprintf(path_index, "sift1b_%dm_ef_%d_M_%d.bin", subset_size_millions, efConstruction, M);

sprintf(path_gt, "../bigann/gnd/idx_%dM.ivecs", subset_size_milllions);
sprintf(path_gt, "../bigann/gnd/idx_%dM.ivecs", subset_size_millions);


unsigned char *massb = new unsigned char[vecdim];
Expand Down