Spring boot 整合 Milvus


Milvus 基本概念


一、Maven 依赖【注意版本 防止依赖冲突】

1
<dependency> <groupId>io.milvus</groupId> <artifactId>milvus-sdk-java</artifactId> <version>2.2.9</version> <exclusions> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> </exclusion> <exclusion> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-slf4j-impl</artifactId> </exclusion> </exclusions> </dependency>

二、MilvusServiceClient 引入

需要跟milvus交互都需要调用MilvusServiceClient,我这里的做法是把它定义成一个Bean,需要用到的地方依赖注入

1.引入配置类

代码如下(示例):
首先yml

1
milvus: config: ipAddr: ***** port: ****
1
import io.milvus.client.MilvusServiceClient; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Scope; @Configuration public class MilvusConfiguration { /** * milvus ip addr */ @Value("${milvus.config.ipAddr}") private String ipAddr; /** * milvus port */ @Value("${milvus.config.port}") private Integer port; @Bean @Scope("singleton") public MilvusServiceClient getMilvusClient() { return getMilvusFactory().getMilvusClient(); } @Bean(initMethod = "init", destroyMethod = "close") public MilvusRestClientFactory getMilvusFactory() { return MilvusRestClientFactory.build(ipAddr, port); } }

或 注意 @Value(“${milvus.port}”) 原理一致

1
@Configuration public class MilvusConfig { @Value("${milvus.host}") private String host; //milvus所在服务器地址 @Value("${milvus.port}") private Integer port; //milvus端口 @Bean public MilvusServiceClient milvusServiceClient() { ConnectParam connectParam = ConnectParam.newBuilder() .withHost(host) .withPort(port) .build(); return new MilvusServiceClient(connectParam); } }

2.常用方法

1.判断集合是否已经存在
代码如下(示例):

1
R<Boolean> response = milvusServiceClient.hasCollection( HasCollectionParam.newBuilder() .withCollectionName("表名") .build());

2.创建集合 + 创建索引 + 把集合加载到内存中
代码如下(示例):

1
FieldType id = FieldType.newBuilder() .withName("id") .withDataType(DataType.Int64) .withPrimaryKey(true) .withAutoID(false) .withDescription("id") .build(); FieldType user_id = FieldType.newBuilder() .withName("user_id") .withDataType(DataType.VarChar) .withMaxLength(1000) .withDescription("user_id") .build(); FieldType question = FieldType.newBuilder() .withName("question") .withDataType(DataType.VarChar) .withMaxLength(10000) .withDescription("question") .build(); FieldType answer = FieldType.newBuilder() .withName("answer") .withDataType(DataType.VarChar) .withMaxLength(10000) .withDescription("answer") .build(); FieldType question_vector = FieldType.newBuilder() .withName("question_vector") .withDescription("question 向量") .withDataType(DataType.FloatVector) .withDimension(1536) .build(); CreateCollectionParam createCollectionReq = CreateCollectionParam.newBuilder() .withCollectionName(FaceArchive.COLLECTION_NAME) .addFieldType(id) .addFieldType(user_id) .addFieldType(question) .addFieldType(answer) .addFieldType(question_vector) .build(); milvusServiceClient.createCollection(createCollectionReq); milvusServiceClient.createIndex(CreateIndexParam.newBuilder() .withFieldName("question_vector") .withCollectionName(FaceArchive.COLLECTION_NAME) .withIndexType(IndexType.HNSW) .withMetricType(MetricType.L2) //nlist 建议值为 4 × sqrt(n),其中 n 指 segment 最多包含的 entity 条数。 .withExtraParam("{\"M\":8,\"efConstruction\":64}") .build()); /** 注意 load() 把数据加的内存中 影响 搜索 */ R<RpcStatus> response1 = milvusServiceClient.loadCollection(LoadCollectionParam.newBuilder() //集合名称 .withCollectionName("qalog") .build());

3.数据插入
代码如下(示例):

1
LambdaQueryWrapper<QaLog> lambdaQueryWrapper = new LambdaQueryWrapper<>(); lambdaQueryWrapper.eq(QaLog::getStatus,"未审核"); //todo 正则 优化 List<QaLog> qaLogList = qaLogService.list(lambdaQueryWrapper); int total = qaLogList.size(); for (int i = 0; i < total/100; i ++) { List<InsertParam.Field> fields = new ArrayList<>(); fields.add(new InsertParam.Field("id", Collections.singletonList(Long.valueOf(qaLogList.get(i).getLogId())))); fields.add(new InsertParam.Field("user_id", Collections.singletonList(qaLogList.get(i).getQuestion()))); fields.add(new InsertParam.Field("question", Collections.singletonList(qaLogList.get(i).getQuestion()))); fields.add(new InsertParam.Field("answer", Collections.singletonList(qaLogList.get(i).getAnswer()))); fields.add(new InsertParam.Field("question_vector", embeddingsUtils.getEmbeddings(qaLogList.get(i).getQuestion()))); InsertParam insertParam = InsertParam.newBuilder() .withCollectionName("qalog") .withFields(fields) .build(); R<MutationResult> insert = milvusServiceClient.insert(insertParam);