import asyncio
from pathlib import Path
from markdown import markdown
from weasyprint import HTML
class AsyncMdToPdf:
def __init__(self, output_dir: str = "./tmp_file/pdf_output"):
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self._html_template = """
{html_body}
"""
async def convert_md_to_pdf(self, md_path: str) -> str:
"""异步将单个 Markdown 转 PDF"""
md_path = Path(md_path)
if not md_path.exists():
raise FileNotFoundError(f"Markdown 文件不存在: {md_path}")
pdf_path = self.output_dir / f"{md_path.stem}.pdf"
# 读取 Markdown
md_text = await asyncio.to_thread(md_path.read_text, encoding="utf-8")
# Markdown -> HTML
html_body = markdown(md_text, extensions=["fenced_code", "tables", "toc"])
html = self._html_template.format(html_body=html_body)
# 写 PDF(同步方法,用线程执行)
await asyncio.to_thread(HTML(string=html, base_url=".").write_pdf, str(pdf_path))
return str(pdf_path)
async def convert_multiple(self, md_paths: list[str], concurrency: int = 3) -> list[str]:
"""异步并发转换多个 Markdown"""
sem = asyncio.Semaphore(concurrency)
async def sem_task(path: str):
async with sem:
return await self.convert_md_to_pdf(path)
tasks = [sem_task(p) for p in md_paths]
return await asyncio.gather(*tasks)
# ====== 使用示例 ======
async def main():
converter = AsyncMdToPdf(output_dir="./tmp_file/pdf_output")
md_files = [
"/work/code/ceshi/deepseek_temporary/tmp_file/paddleocr_parsed/2025版《见证取样送检指南》/2025版《见证取样送检指南》.md",
# 可以添加更多 Markdown 文件路径
]
pdf_paths = await converter.convert_multiple(md_files)
print("生成 PDF 完成:", pdf_paths)
if __name__ == "__main__":
asyncio.run(main())