diff --git a/python_bindings/bindings.cpp b/python_bindings/bindings.cpp index 285b5185..8b80e477 100644 --- a/python_bindings/bindings.cpp +++ b/python_bindings/bindings.cpp @@ -288,6 +288,10 @@ class Index { return ids; } + py::list getFirstLayer(int layer) { + return appr_alg->get_linklist_at_level(appr_alg->entry, layer); + } + py::dict getAnnData() const { /* WARNING: Index::getAnnData is not thread-safe with Index::addItems */ diff --git a/python_bindings/tests/bindings_test_getdata.py b/python_bindings/tests/bindings_test_getdata.py index 2985c1dd..dca0921b 100644 --- a/python_bindings/tests/bindings_test_getdata.py +++ b/python_bindings/tests/bindings_test_getdata.py @@ -44,3 +44,46 @@ def testGettingItems(self): # After adding them, all labels should be retrievable returned_items = p.get_items(labels) self.assertSequenceEqual(data.tolist(), returned_items) + + + def testGettingItems(self): + print("\n**** Getting the data by layer ****\n") + + dim = 16 + num_elements = 10000 + + # Generating sample data + data = np.float32(np.random.random((num_elements, dim))) + labels = np.arange(0, num_elements) + + # Declaring index + p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip + + # Initiating index + # max_elements - the maximum number of elements, should be known beforehand + # (probably will be made optional in the future) + # + # ef_construction - controls index search speed/build speed tradeoff + # M - is tightly connected with internal dimensionality of the data + # strongly affects the memory consumption + + p.init_index(max_elements=num_elements, ef_construction=100, M=16) + + # Controlling the recall by setting ef: + # higher ef leads to better accuracy, but slower search + p.set_ef(100) + + p.set_num_threads(4) # by default using all available cores + + # Before adding anything, getting any labels should fail + self.assertRaises(Exception, lambda: p.get_items(labels)) + + print("Adding all elements (%d)" % (len(data))) + p.add_items(data, labels) + + # After adding them, all labels should be retrievable + returned_items = p.get_items(labels) + self.assertSequenceEqual(data.tolist(), returned_items) + + data = p.getFirstLayer(layer=0) + print(data) \ No newline at end of file diff --git a/sift_1b.cpp b/sift_1b.cpp index 2739490c..d2f2d8be 100644 --- a/sift_1b.cpp +++ b/sift_1b.cpp @@ -231,12 +231,12 @@ inline bool exists_test(const std::string &name) { void sift_test1B() { - int subset_size_milllions = 200; + int subset_size_millions = 200; int efConstruction = 40; int M = 16; - size_t vecsize = subset_size_milllions * 1000000; + size_t vecsize = subset_size_millions * 1000000; size_t qsize = 10000; size_t vecdim = 128; @@ -244,9 +244,9 @@ void sift_test1B() { char path_gt[1024]; char *path_q = "../bigann/bigann_query.bvecs"; char *path_data = "../bigann/bigann_base.bvecs"; - sprintf(path_index, "sift1b_%dm_ef_%d_M_%d.bin", subset_size_milllions, efConstruction, M); + sprintf(path_index, "sift1b_%dm_ef_%d_M_%d.bin", subset_size_millions, efConstruction, M); - sprintf(path_gt, "../bigann/gnd/idx_%dM.ivecs", subset_size_milllions); + sprintf(path_gt, "../bigann/gnd/idx_%dM.ivecs", subset_size_millions); unsigned char *massb = new unsigned char[vecdim];