Why is the total query time the same when using GPU-optimized Faiss or CPU-based Faiss for index construction and search?

Question

I currently have a dataset of 5 million samples and I would like to retrieve the 20 most similar samples from this dataset for each of these samples. To test the efficiency of this process, I have written the GPU version of Faiss index and CPU version of Faiss index. But when run on a V100 machine, both of these code segments take approximately 25 minutes to execute. Why is it that the query time is the same when using either the GPU or the CPU version of the index?

the GPU version of code is as listed

import numpy as np
import faiss
import time
from tqdm import tqdm
import math

def geshihua(total_sec):
    h=total_sec//3600
    total_sec-=h*3600
    minutes = total_sec//60
    total_sec-=minutes*60
    return h,minutes,total_sec

start = int(time.time())
start_time = time.strftime('%Y-%m-%d %H:%M:%s',time.localtime())
print("start on",start_time)

# produce 5000000 vectors
total_samples_num=5000000
dataset = np.random.random((total_samples_num, 128)).astype('float32')

# definite faiss index
res = faiss.StandardGpuResources()
dim,measure=128,faiss.METRIC_L2
param = 'IVF100,PQ16'
index = faiss.index_factory(dim,param,measure)
gpu_index = faiss.index_cpu_to_gpu(res,0,index)
print(gpu_index.is_trained)
# train the faiss
gpu_index.train(dataset)
# add data to dataset
gpu_index.add(dataset)


retri_num = 20
total_indices=np.random.rand(0,retri_num)
bs=1000
for i in tqdm(range(int(math.ceil(total_samples_num/bs)))):
# for i in tqdm(range(int(total_samples_num/bs))):
    queryset=dataset[i*bs:min((i+1)*bs,total_samples_num-i*bs)]
    # use faiss index to get 20 the most similar vector
    distances, indices = gpu_index.search(queryset, retri_num)
    total_indices=np.vstack((total_indices,indices))
    end_time = time.strftime('%Y-%m-%d %H:%M:%s',time.localtime())

end=int(time.time())
total_sec=end-start
h,minutes,total_sec=geshihua(total_sec)
print(f"total running time:{h}h {minutes}m {total_sec}s")

Next, I used the CPU version of Faiss to build an index with the following code:

import numpy as np
import faiss
import time
from tqdm import tqdm
import math

def geshihua(total_sec):
    h=total_sec//3600
    total_sec-=h*3600
    minutes = total_sec//60
    total_sec-=minutes*60
    return h,minutes,total_sec

start = int(time.time())
start_time = time.strftime('%Y-%m-%d %H:%M:%s',time.localtime())
print("start on",start_time)

# produce 5000000 vectors
total_samples_num=5000000
dataset = np.random.random((total_samples_num, 128)).astype('float32')

# definite faiss index
res = faiss.StandardGpuResources()
dim,measure=128,faiss.METRIC_L2
param = 'IVF100,PQ16'
index = faiss.index_factory(dim,param,measure)
# index = faiss.index_cpu_to_gpu(res,0,index)
print(index.is_trained)
# train the faiss
index.train(dataset)
# add data to dataset
index.add(dataset)

retri_num = 20
total_indices=np.random.rand(0,retri_num)
bs=1000
for i in tqdm(range(int(math.ceil(total_samples_num/bs)))):
# for i in tqdm(range(int(total_samples_num/bs))):
    queryset=dataset[i*bs:min((i+1)*bs,total_samples_num-i*bs)]
    # use faiss index to get 20 the most similar vector
    distances, indices = index.search(queryset, retri_num)
    total_indices=np.vstack((total_indices,indices))
    end_time = time.strftime('%Y-%m-%d %H:%M:%s',time.localtime())
    # print(f"finish {i*bs}~{(i+1)*bs} on {end_time}")

end=int(time.time())
total_sec=end-start
h,minutes,total_sec=geshihua(total_sec)
print(f"total running time:{h}h {minutes}m {total_sec}s")

I am unsure as to why the training time is exactly the same when using both the GPU and CPU versions of the index.Is there anything wrong in my code?

Why is the total query time the same when using GPU-optimized Faiss or CPU-based Faiss for index construction and search?

0 Answers0