| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- from openpyxl import load_workbook
- def md_safe(val):
- if isinstance(val, str):
- return val.replace("\r\n", "<br>").replace("\n", "<br>")
- return val
- def parse_sheet(ws):
- """解析单个工作表,返回文本块列表"""
- # 构建合并单元格填充映射 {(row, col): value}
- fill_map = {}
- for r in ws.merged_cells.ranges:
- val = ws.cell(r.min_row, r.min_col).value
- for row in range(r.min_row, r.max_row + 1):
- for col in range(r.min_col, r.max_col + 1):
- fill_map[(row, col)] = val
-
- # 获取跨行合并单元格
- merged = [(r.min_row, r.max_row) for r in ws.merged_cells.ranges if r.max_row > r.min_row]
-
- blocks = []
- row = 1
- while row <= ws.max_row:
- covering = [end for start, end in merged if start <= row <= end]
- block_end = max(covering) if covering else row
-
- # 提取内容(合并单元格填充值)
- content = []
- seen = set()
- for r in range(row, block_end + 1):
- # row_data = [fill_map.get((r, c), cell.value) for c, cell in enumerate(ws[r], 1)]
- row_data = [
- md_safe(fill_map.get((r, c), cell.value))
- for c, cell in enumerate(ws[r], 1)
- ]
- # 过滤空行:所有单元格都是 None 或空字符串
- if all(cell is None or (isinstance(cell, str) and not cell.strip()) for cell in row_data):
- continue
- row_tuple = tuple(row_data)
- if row_tuple not in seen:
- seen.add(row_tuple)
- content.append(row_data)
-
- blocks.append({'start': row, 'end': block_end, 'content': content})
- row = block_end + 1
-
- return blocks
- def parse_excel(file_path):
- """解析Excel所有工作表,返回 {表名: [块列表]} """
- wb = load_workbook(file_path)
- result = {name: parse_sheet(wb[name]) for name in wb.sheetnames}
- wb.close()
- return result
- if __name__ == "__main__":
- FILE_PATH = r"\ceshi\新建 XLSX 工作表.xlsx"
-
- for sheet, blocks in parse_excel(FILE_PATH).items():
- print(f"\n=== {sheet} ===")
- for i, b in enumerate(blocks, 1):
- print(f"块{i}: 第{b['start']}-{b['end']}行 值:{b['content']}" if b['start'] != b['end'] else f"块{i}: 第{b['start']}行 值:{b['content']}")
- # 格式示例
- """
- 块6: 第6行 值:[[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]]
- 块7: 第7-18行 值:[[7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [8, 9, 10, 11, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [9, 10, 11, 12, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [10, 11, 12, 13, 11, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [11, 12, 13, 14, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 30, 31, 32, 33], [12, 13, 14, 15, 11, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 28, 31, 32, 33, 34], [13, 14, 15, 16, 11, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 28, 32, 33, 34, 35], [14, 15, 16, 17, 11, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 28, 33, 34, 35, 36], [15, 16, 17, 18, 11, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 28, 34, 35, 36, 37], [16, 17, 18, 19, 11, 21, 22, 23, 24, 25, 25, 27, 28, 29, 30, 31, 32, 33, 28, 35, 36, 37, 38], [17, 18, 19, 20, 11, 22, 23, 24, 25, 26, 25, 28, 29, 30, 31, 32, 33, 34, 28, 36, 37, 38, 39], [18, 19, 20, 21, 11, 23, 24, 25, 26, 27, 25, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]]
- """
|