from openpyxl import load_workbook def md_safe(val): if isinstance(val, str): return val.replace("\r\n", "
").replace("\n", "
") return val def parse_sheet(ws): """解析单个工作表,返回文本块列表""" # 构建合并单元格填充映射 {(row, col): value} fill_map = {} for r in ws.merged_cells.ranges: val = ws.cell(r.min_row, r.min_col).value for row in range(r.min_row, r.max_row + 1): for col in range(r.min_col, r.max_col + 1): fill_map[(row, col)] = val # 获取跨行合并单元格 merged = [(r.min_row, r.max_row) for r in ws.merged_cells.ranges if r.max_row > r.min_row] blocks = [] row = 1 while row <= ws.max_row: covering = [end for start, end in merged if start <= row <= end] block_end = max(covering) if covering else row # 提取内容(合并单元格填充值) content = [] seen = set() for r in range(row, block_end + 1): # row_data = [fill_map.get((r, c), cell.value) for c, cell in enumerate(ws[r], 1)] row_data = [ md_safe(fill_map.get((r, c), cell.value)) for c, cell in enumerate(ws[r], 1) ] # 过滤空行:所有单元格都是 None 或空字符串 if all(cell is None or (isinstance(cell, str) and not cell.strip()) for cell in row_data): continue row_tuple = tuple(row_data) if row_tuple not in seen: seen.add(row_tuple) content.append(row_data) blocks.append({'start': row, 'end': block_end, 'content': content}) row = block_end + 1 return blocks def parse_excel(file_path): """解析Excel所有工作表,返回 {表名: [块列表]} """ wb = load_workbook(file_path) result = {name: parse_sheet(wb[name]) for name in wb.sheetnames} wb.close() return result if __name__ == "__main__": FILE_PATH = r"\ceshi\新建 XLSX 工作表.xlsx" for sheet, blocks in parse_excel(FILE_PATH).items(): print(f"\n=== {sheet} ===") for i, b in enumerate(blocks, 1): print(f"块{i}: 第{b['start']}-{b['end']}行 值:{b['content']}" if b['start'] != b['end'] else f"块{i}: 第{b['start']}行 值:{b['content']}") # 格式示例 """ 块6: 第6行 值:[[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]] 块7: 第7-18行 值:[[7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [8, 9, 10, 11, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [9, 10, 11, 12, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [10, 11, 12, 13, 11, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [11, 12, 13, 14, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 30, 31, 32, 33], [12, 13, 14, 15, 11, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 28, 31, 32, 33, 34], [13, 14, 15, 16, 11, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 28, 32, 33, 34, 35], [14, 15, 16, 17, 11, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 28, 33, 34, 35, 36], [15, 16, 17, 18, 11, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 28, 34, 35, 36, 37], [16, 17, 18, 19, 11, 21, 22, 23, 24, 25, 25, 27, 28, 29, 30, 31, 32, 33, 28, 35, 36, 37, 38], [17, 18, 19, 20, 11, 22, 23, 24, 25, 26, 25, 28, 29, 30, 31, 32, 33, 34, 28, 36, 37, 38, 39], [18, 19, 20, 21, 11, 23, 24, 25, 26, 27, 25, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]] """