| 1234567891011121314151617181920212223242526 |
- import os
- from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedDataReader
- from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
- from magic_pdf.data.read_api import read_local_images
- class MinerUParseImage():
- # def __init__(self, knowledge_id):
- # self.knowledge_id = knowledge_id
- async def extract_text(self, file_path):
- local_image_dir = "./tmp_file/images"
- image_dir = str(os.path.basename(local_image_dir))
- os.makedirs(local_image_dir, exist_ok=True)
- image_writer = FileBasedDataWriter(local_image_dir)
- ds = read_local_images(file_path)[0] #
- infer_result = ds.apply(doc_analyze, ocr=True)
- pipe_result = infer_result.pipe_ocr_mode(image_writer)
- content_list_content = pipe_result.get_content_list(image_dir)
- return content_list_content
|