SPTAG icon indicating copy to clipboard operation
SPTAG copied to clipboard

The maxcheck parameter has no effect when searching.

Open LLLjun opened this issue 3 years ago • 2 comments

I ran the code following the example, but while searching I found that the MaxCheck parameter doesn't adjust the recall as described.

[1] [query] [maxcheck] [avg] [99%] [95%] [recall] [qps] [mem] [1] 0-10000 16384 0.0028 0.0090 0.0064 0.8103 2886.7361 0GB [1] 0-10000 8192 0.0024 0.0086 0.0056 0.8103 3368.8591 0GB [1] 0-10000 4096 0.0015 0.0058 0.0033 0.8103 5320.0259 0GB [1] 0-10000 2048 0.0015 0.0060 0.0035 0.8103 5267.8604 0GB [1] 0-10000 1024 0.0016 0.0055 0.0036 0.8103 5104.4990 0GB [1] 0-10000 512 0.0014 0.0050 0.0032 0.8103 5527.0239 0GB [1] 0-10000 256 0.0016 0.0054 0.0037 0.8103 4964.4180 0GB

LLLjun avatar Oct 31 '22 13:10 LLLjun

I also encountered the same issue: qps and recall didn't change as expected. Here is the code I tested with the default configuration of sift1m dataset

vector_number = 100000
vector_dimension = 1000

x = np.random.rand(vector_number, vector_dimension).astype(np.float32) 
q = np.random.rand(1000, vector_dimension).astype(np.float32)

m = ''
for i in range(vector_number):
    m += str(i) + '\n'

index = SPTAG.AnnIndex('SPANN', 'Float', vector_dimension)

index.SetBuildParam("IndexAlgoType", "BKT", "Base")
index.SetBuildParam("IndexDirectory", "spann_index", "Base")
index.SetBuildParam("DistCalcMethod", "L2", "Base")

index.SetBuildParam("isExecute", "true", "SelectHead")
index.SetBuildParam("NumberOfThreads", '64', "SelectHead")
index.SetBuildParam("Ratio", "0.16", "SelectHead") # index.SetBuildParam("Count", "200", "SelectHead")
index.SetBuildParam("TreeNumber", "1", "SelectHead")
index.SetBuildParam("BKTKmeansK", "32", "SelectHead")
index.SetBuildParam("BKTLeafSize", "8", "SelectHead")
index.SetBuildParam("SaveBKT", "false", "SelectHead")
index.SetBuildParam("SplitFactor", "6", "SelectHead")
index.SetBuildParam("SplitThreshold", "100", "SelectHead")
index.SetBuildParam("BKTLambdaFactor", "-1", "SelectHead")
index.SetBuildParam("SamplesNumber", "1000", "SelectHead")
index.SetBuildParam("SelectThreshold", "50", "SelectHead")

index.SetBuildParam("isExecute", "true", "BuildHead")
index.SetBuildParam("NeighborhoodSize", "32", "BuildHead")
index.SetBuildParam("TPTNumber", "32", "BuildHead")
index.SetBuildParam("TPTLeafSize", "2000", "BuildHead")
index.SetBuildParam("MaxCheck", "8192", "BuildHead")
index.SetBuildParam("MaxCheckForRefineGraph", "8192", "BuildHead")
index.SetBuildParam("RefineIterations", "3", "BuildHead")
index.SetBuildParam("NumberOfThreads", "64", "BuildHead")
index.SetBuildParam("BKTLambdaFactor", "-1", "BuildHead")
index.SetBuildParam("isExecute", "true", "BuildSSDIndex")
index.SetBuildParam("BuildSsdIndex", "true", "BuildSSDIndex")
index.SetBuildParam("InternalResultNum", "64", "BuildSSDIndex")
index.SetBuildParam("ReplicaCount", "8", "BuildSSDIndex")
index.SetBuildParam("PostingPageLimit", "12", "BuildSSDIndex")
index.SetBuildParam("NumberOfThreads", "64", "BuildSSDIndex")
index.SetBuildParam("MaxCheck", "8192", "BuildSSDIndex")



if (os.path.exists("spann_index")):
    shutil.rmtree("spann_index")
    

print ("Build.............................")
st = time.time()
index.BuildWithMetaData(x, m, vector_number, False, False)
et = time.time()
build_time = et - st
print("Build time : ", build_time)

maxcheck = [100, 200, 400, 1000, 2000]
searchPostingPageLimit = [1, 5, 10, 40, 100]

for m in maxcheck:
    for s in searchPostingPageLimit:

        index.SetSearchParam("isExecute", "true", "SearchSSDIndex")
        index.SetSearchParam("BuildSsdIndex", "false", "SearchSSDIndex")
        index.SetSearchParam("InternalResultNum", "32", "SearchSSDIndex")
        index.SetSearchParam("NumberOfThreads", "4", "SearchSSDIndex")
        index.SetSearchParam("HashTableExponent", "4", "SearchSSDIndex")
        index.SetSearchParam("ResultNum", "10", "SearchSSDIndex")
        index.SetSearchParam("MaxCheck", str(m) , "SearchSSDIndex")
        index.SetSearchParam("MaxDistRatio", "10000", "SearchSSDIndex")
        index.SetSearchParam("SearchPostingPageLimit", str(s), "SearchSSDIndex")


        st = time.time()
        for t in tqdm(range(q.shape[0])):
            result = index.SearchWithMetaData(q[t], 3) # Search k=3 nearest vectors for query vector q
        et = time.time()
        search_time = et - st
        print(f"{m}/{s}   Search time : ", et - st)

marxqiu avatar Nov 03 '22 08:11 marxqiu

Hi, I encountered the same issue. Have you figure out the reason? Thanks in advance!

JingyuanHe1222 avatar Oct 23 '23 02:10 JingyuanHe1222