# VDB | Milvus #milvus 基本操作

第三方库

from pymilvus import connections, Collection, CollectionSchema, FieldSchema, DataType

需要的基本参数

class MilvusManager:def __init__(self, host, port, collection_name):self.host = hostself.port = portself.collection_name = collection_nameself.connection = None

连接

def connect(self):connections.connect(host=self.host, port=int(self.port))

创建collection

def create_collection(self):id_field = FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True)question_field = FieldSchema(name="question", dtype=DataType.FLOAT_VECTOR, dim=1536, is_primary=False)text_field = FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=30000, is_primary=False)schema = CollectionSchema(fields=[id_field, question_field, text_field], description="question_answer")self.collection = Collection(name=self.collection_name, schema=schema, using="default", consistency="STRONG")return self.collection

创建index

def build_index(self, collection):index_params = {"metric_type": "IP","index_type": "IVF_FLAT","params": {"nlist": 1024}}collection.create_index(field_name="question", index_params=index_params)

准备索引参数

index_params = {"metric_type":"L2","index_type":"IVF_FLAT","params":{"nlist":1024}
}

向量索引是元数据的组织单位，用于加速向量相似性搜索。如果没有基于向量构建的索引，Milvus 将默认执行暴力搜索。
当前版本的 Milvus 仅支持向量场索引。未来的版本将支持标量字段上的索引。
默认情况下，Milvus 不会对少于 1,024 行的 segment 进行索引。要更改此参数，请在中 milvus.yaml 进行配置 rootCoord.minSegmentSizeToEnableIndex 。

Parameter	Description	Options
metric_type	用于衡量向量相似性的指标类型。	对于浮点向量： `L2` （欧几里得距离） `IP` （内积）对于二进制向量： `JACCARD` (Jaccard distance) `TANIMOTO` (Tanimoto distance) `HAMMING` (Hamming distance) `SUPERSTRUCTURE` (Superstructure)
index_type	用于加速向量搜索的索引类型	对于浮点向量: `FLAT` (FLAT) `IVF_FLAT` (IVF_FLAT) `IVF_SQ8` (IVF_SQ8) `IVF_PQ` (IVF_PQ) `HNSW` (HNSW) `ANNOY` (ANNOY) `RHNSW_FLAT` (RHNSW_FLAT) `RHNSW_PQ` (RHNSW_PQ) `RHNSW_SQ` (RHNSW_SQ) 对于二进制向量: `BIN_FLAT` (BIN_FLAT) `BIN_IVF_FLAT` (BIN_IVF_FLAT)
params	特定于索引的构建参数。	有关详细信息，请参阅向量索引。

构建索引

通过指定向量字段名称和索引参数来构建索引。

from pymilvus import Collection
collection = Collection("book")      # Get an existing collection.
collection.create_index(field_name="book_intro", index_params=index_params
)

Status(code=0, message='')

Parameter	Description
field_name	要在其上建立索引的向量字段的名称。
index_params	要生成的索引的参数。

load

def load_collection(self, collection):collection.load()

Milvus 中的所有搜索和查询操作都在内存中执行。在执行向量相似性搜索之前，将集合加载到内存中。

清空

def drop_collection(self, collection):collection.drop()

关闭连接

def close(self):if self.connection:self.connection.close()

插入数据

通过指定 partition_name ，您可以选择决定将数据插入到哪个分区。

from pymilvus import Collection
collection = Collection("book")      # Get an existing collection.
mr = collection.insert(data)

Parameter	Description
data	要插入到 Milvus 中的数据。
partition_name （可选）	要在其中插入数据的分区的名称。

注意：向量维度最大是32,768。插入向量的维度要和index的向量维度一致

本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若转载，请注明出处：http://www.rhkb.cn/news/279340.html

如若内容造成侵权/违法违规/事实不符，请联系长河编程网进行投诉反馈email:809451989@qq.com，一经查实，立即删除！