Преглед на файлове

修改向量库的删除和上传文档存储的顺序

weiyu преди 5 месеца
родител
ревизия
3d655b2f23
променени са 2 файла, в които са добавени 31 реда и са изтрити 17 реда
  1. 17 15
      rag/documents_process.py
  2. 14 2
      rag/vector_db/milvus_vector.py

+ 17 - 15
rag/documents_process.py

@@ -318,30 +318,31 @@ class ProcessDocuments():
             logger.info(f"存储到milvus的文本数据:{docs}")
             if flag == "upload":
                 # 插入到milvus库中
-                insert_milvus_flag, insert_milvus_str = self.milvus_client._insert_data(docs)
+                insert_slice_flag, insert_mysql_info = self.mysql_client.insert_to_slice(docs, self.knowledge_id, doc_id)
                 
-                if insert_milvus_flag:
+                if insert_slice_flag:
                     # 插入到mysql的slice info数据库中
-                    insert_slice_flag, insert_mysql_info = self.mysql_client.insert_to_slice(docs, self.knowledge_id, doc_id)
+                    insert_img_flag, insert_mysql_info =  self.mysql_client.insert_to_image_url(flag_img_info, self.knowledge_id, doc_id)
                 else:
-                    insert_slice_flag = False
+                    insert_img_flag = False
                     parse_file_status = False
 
-                if insert_slice_flag:
+                if insert_img_flag:
+                    insert_milvus_flag, insert_milvus_str = self.milvus_client._insert_data(docs)
                     # 插入mysql中的bm_media_replacement表中
-                    insert_img_flag, insert_mysql_info =  self.mysql_client.insert_to_image_url(flag_img_info, self.knowledge_id, doc_id)
                 else:
                     # self.milvus_client._delete_by_doc_id(doc_id=doc_id)
-                    insert_img_flag = False
+                    insert_milvus_flag = False
 
                     # return resp
                     parse_file_status = False
 
-                if insert_img_flag:
+                if insert_milvus_flag:
                     parse_file_status = True
                 
                 else:
-                    self.milvus_client._delete_by_doc_id(doc_id=doc_id)
+                    self.mysql_client.delete_to_slice(doc_id=doc_id)
+                    # self.milvus_client._delete_by_doc_id(doc_id=doc_id)
                     self.mysql_client.delete_image_url(doc_id=doc_id)
                     # resp = {"code": 500, "message": insert_mysql_info}
                     parse_file_status = False
@@ -354,29 +355,30 @@ class ProcessDocuments():
                 self.mysql_client.delete_to_slice(doc_id=doc_id)
 
                 insert_milvus_start_time = time.time()
-                insert_milvus_flag, insert_milvus_str = self.milvus_client._insert_data(docs)
+                insert_slice_flag, insert_mysql_info = self.mysql_client.insert_to_slice(docs, self.knowledge_id, doc_id)
                 # insert_milvus_flag, insert_milvus_str = self.milvus_client._batch_insert_data(docs,text_lists)
                 insert_milvus_end_time = time.time()
                 logger.info(f"插入milvus数据库耗时:{insert_milvus_end_time - insert_milvus_start_time}")
 
-                if insert_milvus_flag:
+                if insert_slice_flag:
                     # 插入到mysql的slice info数据库中
                     insert_mysql_start_time = time.time()
-                    insert_slice_flag, insert_mysql_info = self.mysql_client.insert_to_slice(docs, self.knowledge_id, doc_id)
+                    insert_milvus_flag, insert_milvus_str = self.milvus_client._insert_data(docs)
                     insert_mysql_end_time = time.time()
                     logger.info(f"插入mysql数据库耗时:{insert_mysql_end_time - insert_mysql_start_time}")
                 else:
                     # resp = {"code": 500, "message": insert_milvus_str}
                     # return resp
-                    insert_slice_flag = False
+                    insert_milvus_flag = False
                     parse_file_status = False
                 
-                if insert_slice_flag:
+                if insert_milvus_flag:
                     # resp = {"code": 200, "message": "切片修改成功"}
                     parse_file_status = True
                 
                 else:
-                    self.milvus_client._delete_by_doc_id(doc_id=doc_id)
+                    self.mysql_client.delete_to_slice(doc_id=doc_id)
+                    # self.milvus_client._delete_by_doc_id(doc_id=doc_id)
                     # resp = {"code":500, "message": insert_mysql_info}
                     parse_file_status = False
 

+ 14 - 2
rag/vector_db/milvus_vector.py

@@ -406,6 +406,8 @@ class HybridRetriever:
         expr_delete = f"pk in {primary_keys}"  # 构造删除表达式
         try:
             delete_res = self.client.delete(collection_name=self.collection_name, filter=expr_delete)
+            self.client.flush(collection_name=self.collection_name)
+            self.client.compact(collection_name=self.collection_name)
             logger.info(f"Deleted data with chunk_id: {delete_res}")
             return "delete_success", chunk_len
         except Exception as e:
@@ -429,14 +431,24 @@ class HybridRetriever:
         logger.info(f"获取到的主键信息:{primary_keys}")
         
         # 执行删除操作
-        expr_delete = f"pk in {primary_keys}"  # 构造删除表达式
         try:
-            delete_res = self.client.delete(collection_name=self.collection_name, filter=expr_delete)
+            delete_res = self.client.delete(collection_name=self.collection_name, ids=primary_keys)
+            self.client.flush(collection_name=self.collection_name)
+            self.client.compact(collection_name=self.collection_name)
             logger.info(f"Deleted data with doc_id: {delete_res}")
             return "delete_success"
         except Exception as e:
             logger.error(f"删除数据失败:{e}")
             return "delete_error"
+        # # 执行删除操作
+        # expr_delete = f"pk in {primary_keys}"  # 构造删除表达式
+        # try:
+        #     delete_res = self.client.delete(collection_name=self.collection_name, filter=expr_delete)
+        #     logger.info(f"Deleted data with doc_id: {delete_res}")
+        #     return "delete_success"
+        # except Exception as e:
+        #     logger.error(f"删除数据失败:{e}")
+        #     return "delete_error"
         
     
 # 测试