| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103 |
- from pymilvus import model
- import torch
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
- import os
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
- # 使用sentence transformer方式加载模型
- # embedding_path = r"/opt/vllm/models/BAAI/bge-m3" # 线上路径
- bge_m3_base_url = r"http://10.1.14.16:8787/v1"
- bge_me_model = "bge-m3"
- qwen_ed_base_url = r"http://10.1.14.16:8788/v1"
- qwen_ed_model = "Qwen3-Embedding"
- # embedding_path = r"G:/work/code/models/multilingual-e5-large-instruct/" # 本地路径
- # sentence_transformer_ef = model.dense.SentenceTransformerEmbeddingFunction(model_name=embedding_path,device=device)
- bge_m3_ef = model.dense.OpenAIEmbeddingFunction(
- model_name=bge_me_model, # Specify the model name
- api_key='YOUR_API_KEY', # Provide your OpenAI API key
- # dimensions=512, # Set the embedding dimensionality
- base_url=bge_m3_base_url
- )
- bge_m3_ef._openai_model_meta_info[bge_me_model]["dim"] = 1024
- qwen_ed_ef = model.dense.OpenAIEmbeddingFunction(
- model_name=qwen_ed_model, # Specify the model name
- api_key='YOUR_API_KEY', # Provide your OpenAI API key
- # dimensions=512, # Set the embedding dimensionality
- base_url=qwen_ed_base_url
- )
- qwen_ed_ef._openai_model_meta_info[qwen_ed_model]["dim"] = 1024
- # sentence_transformer_ef = model.hybrid.BGEM3EmbeddingFunction(model_name=embedding_path,device=device,use_fp16=False)
- # embedding_path_qwen = r"/opt/vllm/models/Qwen/Qwen3-Embedding-0.6B"
- # sentence_transformer_qwen = model.dense.SentenceTransformerEmbeddingFunction(model_name=embedding_path_qwen,device=device,
- # model_kwargs={
- # "attn_implementation": "flash_attention_2", # 加速推理
- # "device_map": "auto", # 自动设备分配
- # "torch_dtype": torch.float16
- # },
- # tokenizer_kwargs={
- # "padding_side": "left" # 左侧填充
- # },
-
- # )
- # from transformers import AutoTokenizer, AutoModel
- # import torch
- # class QwenEmbedding:
- # def __init__(self, model_path, device="cuda"):
- # self.tokenizer = AutoTokenizer.from_pretrained(model_path)
- # self.model = AutoModel.from_pretrained(
- # model_path,
- # trust_remote_code=True,
- # torch_dtype=torch.float32
- # ).to(device)
- # self.device = device
- # @torch.no_grad()
- # def __call__(self, texts):
- # inputs = self.tokenizer(
- # texts, padding=True, truncation=True, return_tensors="pt"
- # ).to(self.device)
- # outputs = self.model(**inputs)
- # # 一般取 last_hidden_state 的 CLS 或 mean-pool
- # emb = outputs.last_hidden_state.mean(dim=1)
- # return emb.cpu().numpy()
- # sentence_transformer_qwen = QwenEmbedding(embedding_path_qwen, "cuda:1")
- # # rerank模型
- # bce_rerank_model_path = r"/opt/vllm/models/BAAI/bge-reranker-v2-m3" # 线上路径
- # # bce_rerank_model_path = r"G:/work/code/models/bce-reranker-base_v1" # 本地路径
- # bce_rerank_tokenizer = AutoTokenizer.from_pretrained(bce_rerank_model_path)
- # bce_rerank_base_model = AutoModelForSequenceClassification.from_pretrained(bce_rerank_model_path).to(device)
- # # rerank模型
- # qwen_rerank_model_path = r"/opt/vllm/models/Qwen/Qwen3-Reranker-0.6B" # 线上路径
- # # bce_rerank_model_path = r"G:/work/code/models/bce-reranker-base_v1" # 本地路径
- # qwen_rerank_tokenizer = AutoTokenizer.from_pretrained(bce_rerank_model_path)
- # qwen_rerank_base_model = AutoModelForSequenceClassification.from_pretrained(bce_rerank_model_path).to(device)
- # rerank bge
- rerank_bge_url = "http://10.1.14.16:8791"
- rerank_bge_model = "bge-reranker-v2-m3"
- rerank_qwen_url = "http://10.1.14.16:8790"
- rerank_qwen_model = "Qwen3-Reranker-0.6B"
- from transformers import AutoTokenizer
- tokenizer = AutoTokenizer.from_pretrained(
- "/opt/vllm/models/Qwen/Qwen3-30B-A3B-Instruct-2507",
- trust_remote_code=True # 有自定义 tokenizer 时常用
- )
|