|
|
@@ -0,0 +1,320 @@
|
|
|
+# 工程公司RAG部署指南
|
|
|
+
|
|
|
+## mineru部署:
|
|
|
+
|
|
|
+conda create -n mineru python=3.10
|
|
|
+conda activate mineru
|
|
|
+pip install uv
|
|
|
+uv pip install mineru[all]
|
|
|
+
|
|
|
+设置国内路径:export MINERU_MODEL_SOURCE=modelscope
|
|
|
+启动并自动下载模型:mineru-vllm-server --port 9999
|
|
|
+
|
|
|
+如需使用其它端口,请修改./rag/document_load/pdf_load.py中server_url的默认值
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+## RAG虚拟环境安装
|
|
|
+
|
|
|
+conda create -n app python=3.11
|
|
|
+conda activate app
|
|
|
+pip install -r requirements.txt
|
|
|
+
|
|
|
+torch单独安装:pip install torch==2.8.0 torchvision==0.19.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cu121
|
|
|
+
|
|
|
+保存下面内容为requirements.txt文件
|
|
|
+accelerate==1.10.1
|
|
|
+aiofiles==24.1.0
|
|
|
+aiohappyeyeballs==2.6.1
|
|
|
+aiohttp==3.11.18
|
|
|
+aiosignal==1.3.2
|
|
|
+aiosqlite==0.21.0
|
|
|
+albucore==0.0.23
|
|
|
+albumentations==2.0.5
|
|
|
+annotated-types==0.7.0
|
|
|
+antlr4-python3-runtime==4.9.3
|
|
|
+anyio==4.9.0
|
|
|
+argon2-cffi==23.1.0
|
|
|
+argon2-cffi-bindings==21.2.0
|
|
|
+astor==0.8.1
|
|
|
+attrs==25.3.0
|
|
|
+beautifulsoup4==4.14.2
|
|
|
+blake3==1.0.8
|
|
|
+boto3==1.38.1
|
|
|
+botocore==1.38.1
|
|
|
+Brotli==1.1.0
|
|
|
+cachetools==6.2.1
|
|
|
+cbor2==5.7.0
|
|
|
+certifi==2025.1.31
|
|
|
+cffi==1.17.1
|
|
|
+chardet==5.2.0
|
|
|
+charset-normalizer==3.4.1
|
|
|
+click==8.2.0
|
|
|
+cloudpickle==3.1.1
|
|
|
+colorama==0.4.6
|
|
|
+coloredlogs==15.0.1
|
|
|
+colorlog==6.9.0
|
|
|
+compressed-tensors==0.11.0
|
|
|
+contourpy==1.3.2
|
|
|
+Crawl4AI==0.6.3
|
|
|
+cryptography==44.0.2
|
|
|
+cssselect==1.3.0
|
|
|
+cupy-cuda12x==13.6.0
|
|
|
+cycler==0.12.1
|
|
|
+dataclasses-json==0.6.7
|
|
|
+depyf==0.19.0
|
|
|
+dill==0.4.0
|
|
|
+diskcache==5.6.3
|
|
|
+distro==1.9.0
|
|
|
+dnspython==2.8.0
|
|
|
+doclayout_yolo==0.0.4
|
|
|
+edge-tts==7.0.1
|
|
|
+einops==0.8.1
|
|
|
+email-validator==2.3.0
|
|
|
+fake-http-header==0.3.5
|
|
|
+fake-useragent==2.2.0
|
|
|
+fast-langdetect==0.2.5
|
|
|
+fastapi==0.115.12
|
|
|
+fastapi-cli==0.0.13
|
|
|
+fastapi-cloud-cli==0.3.1
|
|
|
+fastrlock==0.8.3
|
|
|
+fasttext-predict==0.9.2.4
|
|
|
+ffmpy==0.6.3
|
|
|
+filelock==3.18.0
|
|
|
+flatbuffers==25.2.10
|
|
|
+fonttools==4.57.0
|
|
|
+frozendict==2.4.6
|
|
|
+frozenlist==1.6.0
|
|
|
+fsspec==2025.3.2
|
|
|
+ftfy==6.3.1
|
|
|
+gguf==0.17.1
|
|
|
+gradio==5.49.1
|
|
|
+gradio_client==1.13.3
|
|
|
+gradio_pdf==0.0.22
|
|
|
+greenlet==3.2.1
|
|
|
+groovy==0.1.2
|
|
|
+grpcio==1.67.1
|
|
|
+h11==0.14.0
|
|
|
+hf-xet==1.1.10
|
|
|
+httpcore==1.0.8
|
|
|
+httptools==0.7.1
|
|
|
+httpx==0.28.1
|
|
|
+httpx-sse==0.4.0
|
|
|
+huggingface-hub==0.35.3
|
|
|
+humanfriendly==10.0
|
|
|
+humanize==4.12.3
|
|
|
+idna==3.10
|
|
|
+imageio==2.37.0
|
|
|
+importlib_metadata==8.7.0
|
|
|
+interegular==0.3.3
|
|
|
+Jinja2==3.1.6
|
|
|
+jiter==0.9.0
|
|
|
+jmespath==1.0.1
|
|
|
+joblib==1.4.2
|
|
|
+json_repair==0.52.1
|
|
|
+jsonpatch==1.33
|
|
|
+jsonpointer==3.0.0
|
|
|
+jsonschema==4.24.0
|
|
|
+jsonschema-specifications==2025.4.1
|
|
|
+kiwisolver==1.4.8
|
|
|
+langchain==0.3.22
|
|
|
+langchain-community==0.3.18
|
|
|
+langchain-core==0.3.56
|
|
|
+langchain-text-splitters==0.3.8
|
|
|
+langsmith==0.3.34
|
|
|
+lark==1.2.2
|
|
|
+lazy_loader==0.4
|
|
|
+litellm==1.72.0
|
|
|
+llguidance==0.7.30
|
|
|
+llvmlite==0.44.0
|
|
|
+lm-format-enforcer==0.11.3
|
|
|
+loguru==0.7.3
|
|
|
+lxml==5.4.0
|
|
|
+magic-pdf==1.3.10
|
|
|
+magika==0.6.2
|
|
|
+Markdown==3.8
|
|
|
+markdown-it-py==3.0.0
|
|
|
+MarkupSafe==3.0.2
|
|
|
+marshmallow==3.26.1
|
|
|
+matplotlib==3.10.1
|
|
|
+mdurl==0.1.2
|
|
|
+milvus-lite==2.4.12
|
|
|
+milvus-model==0.2.12
|
|
|
+milvus_cli==1.0.2
|
|
|
+mineru==2.5.4
|
|
|
+mineru_vl_utils==0.1.13
|
|
|
+minio==7.2.15
|
|
|
+mistral_common==1.8.5
|
|
|
+modelscope==1.31.0
|
|
|
+mpmath==1.3.0
|
|
|
+msgpack==1.1.2
|
|
|
+msgspec==0.19.0
|
|
|
+multidict==6.4.3
|
|
|
+mypy_extensions==1.1.0
|
|
|
+mysql-connector-python==9.3.0
|
|
|
+nest-asyncio==1.6.0
|
|
|
+networkx==3.4.2
|
|
|
+ninja==1.13.0
|
|
|
+nltk==3.9.1
|
|
|
+numba==0.61.2
|
|
|
+numpy==1.26.4
|
|
|
+nvidia-cublas-cu12==12.8.4.1
|
|
|
+nvidia-cuda-cupti-cu12==12.8.90
|
|
|
+nvidia-cuda-nvrtc-cu12==12.8.93
|
|
|
+nvidia-cuda-runtime-cu12==12.8.90
|
|
|
+nvidia-cudnn-cu12==9.10.2.21
|
|
|
+nvidia-cufft-cu12==11.3.3.83
|
|
|
+nvidia-cufile-cu12==1.13.1.3
|
|
|
+nvidia-curand-cu12==10.3.9.90
|
|
|
+nvidia-cusolver-cu12==11.7.3.90
|
|
|
+nvidia-cusparse-cu12==12.5.8.93
|
|
|
+nvidia-cusparselt-cu12==0.7.1
|
|
|
+nvidia-nccl-cu12==2.27.3
|
|
|
+nvidia-nvjitlink-cu12==12.8.93
|
|
|
+nvidia-nvtx-cu12==12.8.90
|
|
|
+ollama==0.4.8
|
|
|
+omegaconf==2.3.0
|
|
|
+onnxruntime==1.21.1
|
|
|
+openai==1.109.1
|
|
|
+openai-harmony==0.0.4
|
|
|
+opencv-python==4.11.0.86
|
|
|
+opencv-python-headless==4.11.0.86
|
|
|
+orjson==3.10.16
|
|
|
+outlines_core==0.2.11
|
|
|
+packaging==24.2
|
|
|
+pandas==2.2.3
|
|
|
+partial-json-parser==0.2.1.1.post6
|
|
|
+pdfminer.six==20250506
|
|
|
+pdfplumber==0.11.6
|
|
|
+pdftext==0.6.3
|
|
|
+pillow==11.3.0
|
|
|
+playwright==1.52.0
|
|
|
+prometheus-fastapi-instrumentator==7.1.0
|
|
|
+prometheus_client==0.23.1
|
|
|
+propcache==0.3.1
|
|
|
+protobuf==6.30.2
|
|
|
+psutil==7.0.0
|
|
|
+py-cpuinfo==9.0.0
|
|
|
+pybase64==1.4.2
|
|
|
+pyclipper==1.3.0.post6
|
|
|
+pycountry==24.6.1
|
|
|
+pycparser==2.22
|
|
|
+pycryptodome==3.22.0
|
|
|
+pydantic==2.11.10
|
|
|
+pydantic-extra-types==2.10.6
|
|
|
+pydantic-settings==2.9.1
|
|
|
+pydantic_core==2.33.2
|
|
|
+pydub==0.25.1
|
|
|
+pyee==13.0.0
|
|
|
+Pygments==2.19.1
|
|
|
+pymilvus==2.5.4
|
|
|
+PyMuPDF==1.24.14
|
|
|
+pyOpenSSL==25.1.0
|
|
|
+pyparsing==3.2.3
|
|
|
+pypdf==6.1.1
|
|
|
+pypdfium2==4.30.0
|
|
|
+pyperclip==1.9.0
|
|
|
+python-dateutil==2.9.0.post0
|
|
|
+python-docx==1.1.2
|
|
|
+python-dotenv==1.1.0
|
|
|
+python-json-logger==4.0.0
|
|
|
+python-multipart==0.0.20
|
|
|
+pytz==2025.2
|
|
|
+PyYAML==6.0.2
|
|
|
+pyzmq==27.1.0
|
|
|
+rank-bm25==0.2.2
|
|
|
+rapid-table==1.0.5
|
|
|
+ray==2.50.1
|
|
|
+redis==5.2.1
|
|
|
+referencing==0.36.2
|
|
|
+regex==2024.11.6
|
|
|
+reportlab==4.4.4
|
|
|
+requests==2.32.3
|
|
|
+requests-toolbelt==1.0.0
|
|
|
+rich==14.0.0
|
|
|
+rich-toolkit==0.15.1
|
|
|
+rignore==0.7.1
|
|
|
+robust-downloader==0.0.2
|
|
|
+rpds-py==0.25.1
|
|
|
+ruff==0.14.1
|
|
|
+s3transfer==0.12.0
|
|
|
+safehttpx==0.1.6
|
|
|
+safetensors==0.5.3
|
|
|
+scikit-image==0.25.2
|
|
|
+scikit-learn==1.6.1
|
|
|
+scipy==1.15.2
|
|
|
+seaborn==0.13.2
|
|
|
+semantic-version==2.10.0
|
|
|
+sentence-transformers==4.1.0
|
|
|
+sentencepiece==0.2.1
|
|
|
+sentry-sdk==2.42.0
|
|
|
+setproctitle==1.3.7
|
|
|
+shapely==2.1.0
|
|
|
+shellingham==1.5.4
|
|
|
+simsimd==6.2.1
|
|
|
+six==1.17.0
|
|
|
+sniffio==1.3.1
|
|
|
+snowballstemmer==2.2.0
|
|
|
+soundfile==0.13.1
|
|
|
+soupsieve==2.7
|
|
|
+soxr==1.0.0
|
|
|
+SQLAlchemy==2.0.40
|
|
|
+srt==3.5.3
|
|
|
+sse-starlette==2.3.3
|
|
|
+starlette==0.46.2
|
|
|
+stringzilla==3.12.5
|
|
|
+sympy==1.13.3
|
|
|
+tabulate==0.8.9
|
|
|
+tenacity==9.1.2
|
|
|
+tf-playwright-stealth==1.1.2
|
|
|
+thop==0.1.1.post2209072238
|
|
|
+threadpoolctl==3.6.0
|
|
|
+tifffile==2025.10.16
|
|
|
+tiktoken==0.9.0
|
|
|
+tokenizers==0.22.1
|
|
|
+tomlkit==0.13.3
|
|
|
+tqdm==4.67.1
|
|
|
+transformers==4.57.1
|
|
|
+triton==3.4.0
|
|
|
+typer==0.19.2
|
|
|
+typing-inspect==0.9.0
|
|
|
+typing-inspection==0.4.0
|
|
|
+typing_extensions==4.13.2
|
|
|
+tzdata==2025.2
|
|
|
+ujson==5.10.0
|
|
|
+ultralytics==8.3.115
|
|
|
+ultralytics-thop==2.0.14
|
|
|
+urllib3==2.4.0
|
|
|
+uv==0.9.3
|
|
|
+uvicorn==0.34.2
|
|
|
+uvloop==0.22.1
|
|
|
+vllm==0.10.2
|
|
|
+watchfiles==1.1.1
|
|
|
+wcwidth==0.2.13
|
|
|
+websockets==15.0.1
|
|
|
+xformers==0.0.32.post1
|
|
|
+xgrammar==0.1.23
|
|
|
+xxhash==3.5.0
|
|
|
+yarl==1.20.0
|
|
|
+zipp==3.22.0
|
|
|
+zstandard==0.23.0
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+## 其它:
|
|
|
+
|
|
|
+嵌入模型:https://www.modelscope.cn/models/intfloat/multilingual-e5-large-instruct
|
|
|
+重排序模型:https://www.modelscope.cn/models/maidalun/bce-reranker-base_v1
|
|
|
+下载后修改目录下的./rag/load_model.py中的模型路径
|
|
|
+
|
|
|
+修改./config中的相应配置
|
|
|
+
|
|
|
+因为运行环境的不同指定了不同的GPU,可在./rag_server.py顶部指定os.environ["CUDA_VISIBLE_DEVICES"]
|
|
|
+以及./rag/load_model.py中的device = "cuda:1" if torch.cuda.is_available() else "cpu"的cuda:1 为 cuda:x
|
|
|
+
|
|
|
+
|
|
|
+运行rag_server.py 默认6666端口
|