import asyncio from pathlib import Path from markdown import markdown from weasyprint import HTML class AsyncMdToPdf: def __init__(self, output_dir: str = "./tmp_file/pdf_output"): self.output_dir = Path(output_dir) self.output_dir.mkdir(parents=True, exist_ok=True) self._html_template = """
{html_body}
""" async def convert_md_to_pdf(self, md_path: str) -> str: """异步将单个 Markdown 转 PDF""" md_path = Path(md_path) if not md_path.exists(): raise FileNotFoundError(f"Markdown 文件不存在: {md_path}") pdf_path = self.output_dir / f"{md_path.stem}.pdf" # 读取 Markdown md_text = await asyncio.to_thread(md_path.read_text, encoding="utf-8") # Markdown -> HTML html_body = markdown(md_text, extensions=["fenced_code", "tables", "toc"]) html = self._html_template.format(html_body=html_body) # 写 PDF(同步方法,用线程执行) await asyncio.to_thread(HTML(string=html, base_url=".").write_pdf, str(pdf_path)) return str(pdf_path) async def convert_multiple(self, md_paths: list[str], concurrency: int = 3) -> list[str]: """异步并发转换多个 Markdown""" sem = asyncio.Semaphore(concurrency) async def sem_task(path: str): async with sem: return await self.convert_md_to_pdf(path) tasks = [sem_task(p) for p in md_paths] return await asyncio.gather(*tasks) # ====== 使用示例 ====== async def main(): converter = AsyncMdToPdf(output_dir="./tmp_file/pdf_output") md_files = [ "/work/code/ceshi/deepseek_temporary/tmp_file/paddleocr_parsed/2025版《见证取样送检指南》/2025版《见证取样送检指南》.md", # 可以添加更多 Markdown 文件路径 ] pdf_paths = await converter.convert_multiple(md_files) print("生成 PDF 完成:", pdf_paths) if __name__ == "__main__": asyncio.run(main())