| 1234567891011121314151617181920212223242526 |
- # import os
- # from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedDataReader
- # from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
- # from magic_pdf.data.read_api import read_local_images
- # class MinerUParseImage():
- # # def __init__(self, knowledge_id):
- # # self.knowledge_id = knowledge_id
- # async def extract_text(self, file_path):
- # local_image_dir = "./tmp_file/images"
- # image_dir = str(os.path.basename(local_image_dir))
- # os.makedirs(local_image_dir, exist_ok=True)
- # image_writer = FileBasedDataWriter(local_image_dir)
- # ds = read_local_images(file_path)[0] #
- # infer_result = ds.apply(doc_analyze, ocr=True)
- # pipe_result = infer_result.pipe_ocr_mode(image_writer)
- # content_list_content = pipe_result.get_content_list(image_dir)
- # return content_list_content
|