Skip to content

Commit 3ae28cd

Browse files
committed
Revert "indexSize -> includes marked deleted"
This reverts commit 82758c2.
1 parent 45462d4 commit 3ae28cd

File tree

4 files changed

+37
-69
lines changed

4 files changed

+37
-69
lines changed

src/VecSim/algorithms/svs/svs.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
240240
}
241241

242242
int deleteVectorsImpl(const labelType *labels, size_t n) {
243-
if (indexLabelCount() == 0) {
243+
if (indexSize() == 0) {
244244
return 0;
245245
}
246246

@@ -280,7 +280,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
280280
return;
281281

282282
// SVS index instance should not be empty
283-
if (indexLabelCount() == 0) {
283+
if (indexSize() == 0) {
284284
this->impl_.reset();
285285
num_marked_deleted = 0;
286286
return;
@@ -321,7 +321,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
321321

322322
~SVSIndex() = default;
323323

324-
size_t indexSize() const override { return indexStorageSize(); }
324+
size_t indexSize() const override { return impl_ ? impl_->size() : 0; }
325325

326326
size_t indexStorageSize() const override { return impl_ ? impl_->view_data().size() : 0; }
327327

@@ -333,7 +333,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
333333
if constexpr (isMulti) {
334334
return impl_ ? impl_->labelcount() : 0;
335335
} else {
336-
return impl_ ? impl_->size() : 0;
336+
return indexSize();
337337
}
338338
}
339339

@@ -515,7 +515,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
515515
VecSimQueryParams *queryParams) const override {
516516
auto rep = new VecSimQueryReply(this->allocator);
517517
this->lastMode = STANDARD_KNN;
518-
if (k == 0 || this->indexLabelCount() == 0) {
518+
if (k == 0 || this->indexSize() == 0) {
519519
return rep;
520520
}
521521

@@ -560,7 +560,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
560560
VecSimQueryParams *queryParams) const override {
561561
auto rep = new VecSimQueryReply(this->allocator);
562562
this->lastMode = RANGE_QUERY;
563-
if (radius == 0 || this->indexLabelCount() == 0) {
563+
if (radius == 0 || this->indexSize() == 0) {
564564
return rep;
565565
}
566566

@@ -633,7 +633,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
633633
// take ownership of the blob copy and pass it to the batch iterator.
634634
auto *queryBlobCopyPtr = queryBlobCopy.release();
635635
// Ownership of queryBlobCopy moves to VecSimBatchIterator that will free it at the end.
636-
if (indexLabelCount() == 0) {
636+
if (indexSize() == 0) {
637637
return new (this->getAllocator())
638638
NullSVS_BatchIterator(queryBlobCopyPtr, queryParams, this->getAllocator());
639639
} else {
@@ -643,7 +643,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
643643
}
644644

645645
bool preferAdHocSearch(size_t subsetSize, size_t k, bool initial_check) const override {
646-
size_t index_size = this->indexLabelCount();
646+
size_t index_size = this->indexSize();
647647

648648
// Calculate the ratio of the subset size to the total index size.
649649
double subsetRatio = (index_size == 0) ? 0.f : static_cast<double>(subsetSize) / index_size;

tests/unit/test_svs.cpp

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -264,15 +264,13 @@ TYPED_TEST(SVSTest, svs_bulk_vectors_add_delete_test) {
264264
// Delete almost all vectors
265265
const size_t keep_num = 1;
266266
ASSERT_EQ(svs_index->deleteVectors(ids.data(), n - keep_num), n - keep_num);
267-
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
268-
ASSERT_EQ(index->indexLabelCount(), keep_num);
267+
ASSERT_EQ(VecSimIndex_IndexSize(index), keep_num);
269268
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - keep_num);
270269

271270
// Delete rest of the vectors
272271
// num_marked_deleted should reset.
273272
ASSERT_EQ(svs_index->deleteVectors(ids.data() + n - keep_num, keep_num), keep_num);
274273
ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
275-
ASSERT_EQ(index->indexLabelCount(), 0);
276274
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 0);
277275
VecSimIndex_Free(index);
278276
}
@@ -451,18 +449,14 @@ TYPED_TEST(SVSTest, svs_reindexing_same_vector) {
451449
for (size_t i = 0; i < n - 1; i++) {
452450
VecSimIndex_DeleteVector(index, i);
453451
}
454-
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
455-
ASSERT_EQ(index->indexLabelCount(), 1);
456-
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - 1);
452+
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
457453

458454
// Reinsert the same vectors under the same ids.
459455
for (size_t i = 0; i < n; i++) {
460456
// i / 10 is in integer (take the "floor value).
461457
GenerateAndAddVector<TEST_DATA_T>(index, dim, i, i / 10);
462458
}
463-
ASSERT_EQ(VecSimIndex_IndexSize(index), 2 * n);
464-
ASSERT_EQ(index->indexLabelCount(), n);
465-
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n);
459+
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
466460

467461
// Run the same query again.
468462
runTopKSearchTest(index, query, k, verify_res);
@@ -515,18 +509,14 @@ TYPED_TEST(SVSTest, svs_reindexing_same_vector_different_id) {
515509
for (size_t i = 0; i < n - 1; i++) {
516510
VecSimIndex_DeleteVector(index, i);
517511
}
518-
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
519-
ASSERT_EQ(index->indexLabelCount(), 1);
520-
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - 1);
512+
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
521513

522514
// Reinsert the same vectors under different ids than before.
523515
for (size_t i = 0; i < n; i++) {
524516
GenerateAndAddVector<TEST_DATA_T>(index, dim, i + 10,
525517
i / 10); // i / 10 is in integer (take the "floor" value).
526518
}
527-
ASSERT_EQ(VecSimIndex_IndexSize(index), 2 * n);
528-
ASSERT_EQ(index->indexLabelCount(), n);
529-
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n);
519+
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
530520

531521
// Run the same query again.
532522
auto verify_res_different_id = [&](size_t id, double score, size_t index) {
@@ -928,8 +918,7 @@ TYPED_TEST(SVSTest, test_delete_vector) {
928918

929919
// Here the shift should happen.
930920
VecSimIndex_DeleteVector(index, 1);
931-
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
932-
ASSERT_EQ(index->indexLabelCount(), n - 1);
921+
ASSERT_EQ(VecSimIndex_IndexSize(index), n - 1);
933922

934923
TEST_DATA_T query[] = {0.0, 0.0};
935924
auto verify_res = [&](size_t id, double score, size_t index) {
@@ -3033,8 +3022,7 @@ TYPED_TEST(SVSTest, logging_runtime_params) {
30333022
index->addVector(v[i].data(), ids[i]);
30343023
}
30353024
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 10);
3036-
ASSERT_EQ(VecSimIndex_IndexSize(index), n + 10);
3037-
ASSERT_EQ(index->indexLabelCount(), n);
3025+
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
30383026

30393027
float query[] = {50, 50, 50, 50};
30403028
auto verify_res = [&](size_t id, double score, size_t index) { EXPECT_EQ(id, (index + 45)); };

tests/unit/test_svs_multi.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ TYPED_TEST(SVSMultiTest, test_dynamic_svs_info_iterator) {
536536
VecSimIndex_DeleteVector(index, 0);
537537
info = VecSimIndex_DebugInfo(index);
538538
infoIter = VecSimIndex_DebugInfoIterator(index);
539-
ASSERT_EQ(4, info.commonInfo.indexSize);
539+
ASSERT_EQ(2, info.commonInfo.indexSize);
540540
ASSERT_EQ(1, info.commonInfo.indexLabelCount);
541541
compareSVSIndexInfoToIterator(info, infoIter);
542542
VecSimDebugInfoIterator_Free(infoIter);

tests/unit/test_svs_tiered.cpp

Lines changed: 21 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -894,7 +894,7 @@ TYPED_TEST(SVSTieredIndexTestBasic, KNNSearch) {
894894
VecSimIndex_DeleteVector(svs_index, i);
895895
}
896896
ASSERT_EQ(flat_index->indexSize(), n * 2 / 3);
897-
ASSERT_EQ(svs_index->indexLabelCount(), n / 2);
897+
ASSERT_EQ(svs_index->indexSize(), n / 2);
898898
k = n * 2 / 3;
899899
cur_memory_usage = allocator->getAllocationSize();
900900
runTopKSearchTest(tiered_index, query_0, k, ver_res_0);
@@ -909,7 +909,7 @@ TYPED_TEST(SVSTieredIndexTestBasic, KNNSearch) {
909909
VecSimIndex_DeleteVector(flat_index, i);
910910
}
911911
ASSERT_EQ(flat_index->indexSize(), n / 6);
912-
ASSERT_EQ(svs_index->indexLabelCount(), n / 2);
912+
ASSERT_EQ(svs_index->indexSize(), n / 2);
913913
k = n / 4;
914914
cur_memory_usage = allocator->getAllocationSize();
915915
runTopKSearchTest(tiered_index, query_0, k, ver_res_0);
@@ -923,7 +923,7 @@ TYPED_TEST(SVSTieredIndexTestBasic, KNNSearch) {
923923
GenerateAndAddVector<TEST_DATA_T>(flat_index, dim, i, i);
924924
}
925925
ASSERT_EQ(flat_index->indexSize(), n * 2 / 3);
926-
ASSERT_EQ(svs_index->indexLabelCount(), 0);
926+
ASSERT_EQ(svs_index->indexSize(), 0);
927927
k = n / 3;
928928
cur_memory_usage = allocator->getAllocationSize();
929929
runTopKSearchTest(tiered_index, query_0, k, ver_res_0);
@@ -1133,33 +1133,27 @@ TYPED_TEST(SVSTieredIndexTestBasic, markedDeleted) {
11331133

11341134
// Override a vector while in the backend
11351135
GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, 1);
1136-
ASSERT_EQ(tiered_index->indexSize(), n + 1);
1137-
ASSERT_EQ(tiered_index->indexLabelCount(), n);
1138-
ASSERT_EQ(tiered_index->GetBackendIndex()->indexLabelCount(), n - 1);
1136+
ASSERT_EQ(tiered_index->indexSize(), n);
11391137
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 1);
11401138
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 1);
11411139

1142-
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
1140+
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - 1);
11431141
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 1);
11441142

11451143
// Delete the overriden vector
11461144
VecSimIndex_DeleteVector(tiered_index, 1);
1147-
ASSERT_EQ(tiered_index->indexSize(), n);
1145+
ASSERT_EQ(tiered_index->indexSize(), n - 1);
11481146
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 1);
11491147
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 1);
1150-
ASSERT_EQ(tiered_index->indexLabelCount(), n - 1);
1151-
ASSERT_EQ(tiered_index->GetBackendIndex()->indexLabelCount(), n - 1);
1152-
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
1148+
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - 1);
11531149
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 0);
11541150

1155-
// Delete another arbitrary vector
1151+
// Delete another arbirtrary vector
11561152
VecSimIndex_DeleteVector(tiered_index, 0);
1157-
ASSERT_EQ(tiered_index->indexSize(), n);
1158-
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
1159-
ASSERT_EQ(tiered_index->GetBackendIndex()->indexLabelCount(), n - 2);
1160-
ASSERT_EQ(tiered_index->indexLabelCount(), n - 2);
1153+
ASSERT_EQ(tiered_index->indexSize(), n - 2);
11611154
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 2);
11621155
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 2);
1156+
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - 2);
11631157
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 0);
11641158

11651159
// Empty Index
@@ -1173,8 +1167,6 @@ TYPED_TEST(SVSTieredIndexTestBasic, markedDeleted) {
11731167
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 0);
11741168
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), 0);
11751169
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 0);
1176-
ASSERT_EQ(tiered_index->GetBackendIndex()->indexLabelCount(), 0);
1177-
ASSERT_EQ(tiered_index->indexLabelCount(), 0);
11781170
}
11791171

11801172
TYPED_TEST(SVSTieredIndexTestBasic, deleteVectorMulti) {
@@ -2930,19 +2922,15 @@ TYPED_TEST(SVSTieredIndexTest, writeInPlaceMode) {
29302922
TEST_DATA_T overwritten_vec[] = {1, 1, 1, 1};
29312923
tiered_index->addVector(overwritten_vec, vec_label);
29322924
expected_marked_deleted++;
2933-
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), 3);
2934-
ASSERT_EQ(tiered_index->indexSize(), 3);
2935-
ASSERT_EQ(tiered_index->indexLabelCount(), 2);
2925+
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), 2);
29362926
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 0);
29372927
ASSERT_EQ(tiered_index->getDistanceFrom_Unsafe(vec_label, overwritten_vec), 0);
29382928
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), expected_marked_deleted);
29392929
}
2940-
// Validate that the vector is marked as deleted.
2930+
// Validate that the vector is removed in place.
29412931
tiered_index->deleteVector(vec_label);
29422932
expected_marked_deleted++;
2943-
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), TypeParam::isMulti() ? 2 : 3);
2944-
ASSERT_EQ(tiered_index->indexLabelCount(), 1);
2945-
2933+
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), 1);
29462934
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), expected_marked_deleted);
29472935
EXPECT_EQ(tiered_index->statisticInfo().numberOfMarkedDeleted, expected_marked_deleted);
29482936
}
@@ -3040,7 +3028,7 @@ TYPED_TEST(SVSTieredIndexTest, switchWriteModes) {
30403028
mock_thread_pool.thread_pool_join();
30413029
// Verify that vectors were moved to SVS as expected
30423030
auto sz_f = tiered_index->GetFlatIndex()->indexSize();
3043-
auto sz_b = tiered_index->GetBackendIndex()->indexLabelCount();
3031+
auto sz_b = tiered_index->GetBackendIndex()->indexSize();
30443032
EXPECT_LE(sz_f, this->getUpdateThreshold());
30453033
if (TypeParam::isMulti()) {
30463034
ASSERT_EQ(tiered_index->indexLabelCount(), 2 * n_labels);
@@ -3119,10 +3107,8 @@ TYPED_TEST(SVSTieredIndexTestBasic, runGCAPI) {
31193107
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), threshold);
31203108
EXPECT_EQ(tiered_index->statisticInfo().numberOfMarkedDeleted, threshold);
31213109

3122-
ASSERT_EQ(tiered_index->indexSize(), n);
3123-
ASSERT_EQ(tiered_index->indexLabelCount(), n - threshold);
3124-
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
3125-
ASSERT_EQ(tiered_index->GetBackendIndex()->indexLabelCount(), n - threshold);
3110+
ASSERT_EQ(tiered_index->indexSize(), n - threshold);
3111+
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - threshold);
31263112
ASSERT_EQ(tiered_index->GetSVSIndex()->indexStorageSize(), n);
31273113
auto size_before_gc = tiered_index->getAllocationSize();
31283114

@@ -3191,7 +3177,7 @@ TYPED_TEST(SVSTieredIndexTestBasic, switchDeleteModes) {
31913177
mock_thread_pool.thread_pool_join();
31923178
// Verify that vectors were moved to SVS as expected
31933179
auto sz_f = tiered_index->GetFlatIndex()->indexSize();
3194-
auto sz_b = tiered_index->GetBackendIndex()->indexLabelCount();
3180+
auto sz_b = tiered_index->GetBackendIndex()->indexSize();
31953181
EXPECT_LE(sz_f, update_threshold);
31963182
EXPECT_EQ(sz_f + sz_b, n);
31973183
}
@@ -3284,17 +3270,14 @@ TYPED_TEST(SVSTieredIndexTestBasic, testSwapJournalSingle) {
32843270
// For single-value index, following vectors should be in the index:
32853271
// 0:deleted, 1: 10, 2: deleted, 3:3, ..., n-2:deleted n-1: 10(n-1), n+1: n+1;
32863272
// total: n-2 vectors and labels
3273+
ASSERT_EQ(tiered_index->indexSize(), n - 2);
32873274
ASSERT_EQ(tiered_index->indexLabelCount(), n - 2);
3288-
EXPECT_EQ(tiered_index->GetBackendIndex()->indexLabelCount(), n - 5);
32893275

3290-
// We added 3 vectors to the flat index and removed 5 vectors from the backend index.
32913276
// Backend index: 0:deleted, 1:deleted, 2:deleted, 3:3, ..., n-2:deleted, n-1:deleted;
32923277
// total: n-5
3293-
EXPECT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
3294-
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 5);
3278+
EXPECT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - 5);
32953279
// Frontend index: 1:10, n-1:10(n-1), n+1:n+1
32963280
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 3);
3297-
ASSERT_EQ(tiered_index->indexSize(), n + tiered_index->GetFlatIndex()->indexSize());
32983281

32993282
double abs_err = 1e-2; // Allow a larger relative error for quantization.
33003283
TEST_DATA_T expected_vector[dim];
@@ -3418,16 +3401,13 @@ TYPED_TEST(SVSTieredIndexTestBasic, testSwapJournalMulti) {
34183401
// For multi-value index, following vectors should be in the index:
34193402
// 0: deleted, 1: (1,10), 2: deleted, 3:3, ..., n-2: deleted n-1: 10(n-1), n+1: n+1;
34203403
// total: n-2 labels, n-1 vectors
3404+
ASSERT_EQ(tiered_index->indexSize(), n - 1);
34213405
ASSERT_EQ(tiered_index->indexLabelCount(), n - 2);
3422-
EXPECT_EQ(tiered_index->GetBackendIndex()->indexLabelCount(), n - 4);
34233406

3424-
// We added 3 vectors to the flat index and removed 4 vectors from the backend index.
34253407
// Backend index: 0:deleted, 1:1, 2:deleted, 3:3, ..., n-2:deleted, n-1:deleted; total: n-4
3426-
EXPECT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
3427-
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 4);
3408+
EXPECT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - 4);
34283409
// Frontend index: 1:10, n-1:10(n-1), n+1:n+1
34293410
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 3);
3430-
ASSERT_EQ(tiered_index->indexSize(), n + tiered_index->GetFlatIndex()->indexSize());
34313411

34323412
double abs_err = 1e-2; // Allow a larger relative error for quantization.
34333413
TEST_DATA_T expected_vector[dim];

0 commit comments

Comments
 (0)