faiss windows 安装使用

import numpy as np
d = 64                                           # 向量维度
nb = 100000                                      # index向量库的数据量
nq = 10000                                       # 待检索query的数目
np.random.seed(1234)             
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.                # index向量库的向量
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.                # 待检索的query向量

第二步，构建索引，这里我们选用暴力检索的方法FlatL2，L2代表构建的index采用的相似度度量方法为L2范数，即欧氏距离：

import faiss          
index = faiss.IndexFlatL2(d)             
print(index.is_trained)         # 输出为True，代表该类index不需要训练，只需要add向量进去即可
index.add(xb)                   # 将向量库中的向量加入到index中
print(index.ntotal)             # 输出index中包含的向量总数，为100000

第三步，检索TopK相似query：

k = 4                     # topK的K值
D, I = index.search(xq, k)# xq为待检索向量，返回的I为每个待检索query最相似TopK的索引list，D为其对应的距离
print(I[:5])
print(D[-5:])

打印输出为：

>>> 
[[  0 393 363  78] 
 [  1 555 277 364] 
 [  2 304 101  13] 
 [  3 173  18 182] 
 [  4 288 370 531]]  
[[ 0.          7.17517328  7.2076292   7.25116253]  
 [ 0.          6.32356453  6.6845808   6.79994535]  
 [ 0.          5.79640865  6.39173603  7.28151226]  
 [ 0.          7.27790546  7.52798653  7.66284657]  
 [ 0.          6.76380348  7.29512024  7.36881447]]

使用：

import torch
from PIL import Image
from transformers import AutoProcessor, CLIPModel, AutoImageProcessor, AutoModel
import faiss
import os
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else "cpu")

#Load CLIP model and processor
processor_clip = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
model_clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)

#Load DINOv2 model and processor
processor_dino = AutoImageProcessor.from_pretrained('facebook/dinov2-base')
model_dino = AutoModel.from_pretrained('facebook/dinov2-base').to(device)

#Retrieve all filenames
images = []
for root, dirs, files in os.walk('./val2017/'):
    for file in files:
        if file.endswith('jpg'):
            images.append(root  + '/'+ file)


#Define a function that normalizes embeddings and add them to the index
def add_vector_to_index(embedding, index):
    #convert embedding to numpy
    vector = embedding.detach().cpu().numpy()
    #Convert to float32 numpy
    vector = np.float32(vector)
    #Normalize vector: important to avoid wrong results when searching
    faiss.normalize_L2(vector)
    #Add to index
    index.add(vector)

def extract_features_clip(image):
    with torch.no_grad():
        inputs = processor_clip(images=image, return_tensors="pt").to(device)
        image_features = model_clip.get_image_features(**inputs)
        return image_features

def extract_features_dino(image):
    with torch.no_grad():
        inputs = processor_dino(images=image, return_tensors="pt").to(device)
        outputs = model_dino(**inputs)
        image_features = outputs.last_hidden_state
        return image_features.mean(dim=1)

#Create 2 indexes.
index_clip = faiss.IndexFlatL2(512)
index_dino = faiss.IndexFlatL2(768)

#Iterate over the dataset to extract features X2 and store features in indexes
for image_path in images:
    img = Image.open(image_path).convert('RGB')
    clip_features = extract_features_clip(img)
    add_vector_to_index(clip_features,index_clip)
    dino_features = extract_features_dino(img)
    add_vector_to_index(dino_features,index_dino)

#store the indexes locally
faiss.write_index(index_clip,"clip.index")
faiss.write_index(index_dino,"dino.index")

文章来源:https://blog.csdn.net/jacke121/article/details/135018438
本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若内容造成侵权/违法违规/事实不符，请联系我的编程经验分享网邮箱：veading@qq.com进行投诉反馈，一经查实，立即删除！

faiss windows 安装使用

安装 测试成功：

pip安装 测试失败：

入门使用：

2. Faiss简单上手

使用：

安装测试成功：

pip安装测试失败：