import os from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedDataReader from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze from magic_pdf.data.read_api import read_local_images class MinerUParseImage(): # def __init__(self, knowledge_id): # self.knowledge_id = knowledge_id async def extract_text(self, file_path): local_image_dir = "./tmp_file/images" image_dir = str(os.path.basename(local_image_dir)) os.makedirs(local_image_dir, exist_ok=True) image_writer = FileBasedDataWriter(local_image_dir) ds = read_local_images(file_path)[0] # infer_result = ds.apply(doc_analyze, ocr=True) pipe_result = infer_result.pipe_ocr_mode(image_writer) content_list_content = pipe_result.get_content_list(image_dir) return content_list_content