瀏覽代碼

Merge pull request #30 from PeinYu/main

feat:更新bge-large-zh-v1.5向量模型
ageerle 2 月之前
父節點
當前提交
9062d47b99

+ 63 - 0
ruoyi-modules/ruoyi-knowledge/src/main/java/org/ruoyi/knowledge/chain/vectorizer/BgeLargeVectorization.java

@@ -0,0 +1,63 @@
+package org.ruoyi.knowledge.chain.vectorizer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import com.google.gson.Gson;
+import io.github.ollama4j.OllamaAPI;
+import io.github.ollama4j.models.embeddings.OllamaEmbeddingsRequestModel;
+import jakarta.annotation.Resource;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.ruoyi.knowledge.domain.vo.KnowledgeInfoVo;
+import org.ruoyi.knowledge.service.IKnowledgeInfoService;
+import org.springframework.context.annotation.Lazy;
+import org.springframework.stereotype.Component;
+
+@Component
+@Slf4j
+@RequiredArgsConstructor
+public class BgeLargeVectorization implements Vectorization {
+
+    String host = "http://localhost:11434/";
+
+    @Lazy
+    @Resource
+    private IKnowledgeInfoService knowledgeInfoService;
+
+    @Override
+    public List<List<Double>> batchVectorization(List<String> chunkList, String kid) {
+        OllamaAPI ollamaAPI = new OllamaAPI(host);
+        KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid));
+        List<Double> doubleVector;
+        try {
+            doubleVector = ollamaAPI.generateEmbeddings(new OllamaEmbeddingsRequestModel(knowledgeInfoVo.getVectorModel(), new Gson().toJson(chunkList)));
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+        List<List<Double>> vectorList = new ArrayList<>();
+        vectorList.add(doubleVector);
+        return vectorList;
+    }
+
+    @Override
+    public List<Double> singleVectorization(String chunk, String kid) {
+        List<String> chunkList = new ArrayList<>();
+        chunkList.add(chunk);
+        List<List<Double>> vectorList = batchVectorization(chunkList, kid);
+        return vectorList.get(0);
+    }
+
+    public static void main(String[] args) {
+        OllamaAPI ollamaAPI = new OllamaAPI("http://localhost:11434/");
+        List<String> chunkList = Arrays.asList("天很蓝", "海很深");
+        List<Double> doubleVector;
+        try {
+            doubleVector = ollamaAPI.generateEmbeddings(new OllamaEmbeddingsRequestModel("quentinz/bge-large-zh-v1.5", new Gson().toJson(chunkList)));
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+        System.out.println("=== " + doubleVector + " 1===");
+    }
+}

+ 25 - 3
ruoyi-modules/ruoyi-knowledge/src/main/java/org/ruoyi/knowledge/chain/vectorizer/VectorizationFactory.java

@@ -1,6 +1,11 @@
 package org.ruoyi.knowledge.chain.vectorizer;
 
+import cn.hutool.core.util.StrUtil;
+import jakarta.annotation.Resource;
 import lombok.extern.slf4j.Slf4j;
+import org.ruoyi.knowledge.domain.vo.KnowledgeInfoVo;
+import org.ruoyi.knowledge.service.IKnowledgeInfoService;
+import org.springframework.context.annotation.Lazy;
 import org.springframework.stereotype.Component;
 
 /**
@@ -13,11 +18,28 @@ public class VectorizationFactory {
 
     private final OpenAiVectorization openAiVectorization;
 
-    public VectorizationFactory(OpenAiVectorization openAiVectorization) {
+    private final BgeLargeVectorization bgeLargeVectorization;
+
+    @Lazy
+    @Resource
+    private IKnowledgeInfoService knowledgeInfoService;
+
+    public VectorizationFactory(OpenAiVectorization openAiVectorization,BgeLargeVectorization bgeLargeVectorization) {
         this.openAiVectorization = openAiVectorization;
+        this.bgeLargeVectorization = bgeLargeVectorization;
     }
 
-    public Vectorization getEmbedding(){
-        return openAiVectorization;
+    public Vectorization getEmbedding(String kid){
+        String vectorModel = "text-embedding-3-small";
+        if (StrUtil.isNotEmpty(kid)) {
+            KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid));
+            if (knowledgeInfoVo != null && StrUtil.isNotEmpty(knowledgeInfoVo.getVectorModel())) {
+                vectorModel = knowledgeInfoVo.getVectorModel();
+            }
+        }
+        return switch (vectorModel) {
+            case "quentinz/bge-large-zh-v1.5" -> bgeLargeVectorization;
+            default -> openAiVectorization;
+        };
     }
 }

+ 6 - 2
ruoyi-modules/ruoyi-knowledge/src/main/java/org/ruoyi/knowledge/chain/vectorizer/VectorizationWrapper.java

@@ -1,7 +1,11 @@
 package org.ruoyi.knowledge.chain.vectorizer;
 
+import jakarta.annotation.Resource;
 import lombok.AllArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
+import org.ruoyi.knowledge.domain.vo.KnowledgeInfoVo;
+import org.ruoyi.knowledge.service.IKnowledgeInfoService;
+import org.springframework.context.annotation.Lazy;
 import org.springframework.context.annotation.Primary;
 import org.springframework.stereotype.Component;
 
@@ -16,13 +20,13 @@ public class VectorizationWrapper implements Vectorization{
     private final VectorizationFactory vectorizationFactory;
     @Override
     public List<List<Double>> batchVectorization(List<String> chunkList, String kid) {
-        Vectorization embedding = vectorizationFactory.getEmbedding();
+        Vectorization embedding = vectorizationFactory.getEmbedding(kid);
         return embedding.batchVectorization(chunkList, kid);
     }
 
     @Override
     public List<Double> singleVectorization(String chunk, String kid) {
-        Vectorization embedding = vectorizationFactory.getEmbedding();
+        Vectorization embedding = vectorizationFactory.getEmbedding(kid);
         return embedding.singleVectorization(chunk, kid);
     }
 }