from openpyxl import load_workbook
def md_safe(val):
if isinstance(val, str):
return val.replace("\r\n", "
").replace("\n", "
")
return val
def parse_sheet(ws):
"""解析单个工作表,返回文本块列表"""
# 构建合并单元格填充映射 {(row, col): value}
fill_map = {}
for r in ws.merged_cells.ranges:
val = ws.cell(r.min_row, r.min_col).value
for row in range(r.min_row, r.max_row + 1):
for col in range(r.min_col, r.max_col + 1):
fill_map[(row, col)] = val
# 获取跨行合并单元格
merged = [(r.min_row, r.max_row) for r in ws.merged_cells.ranges if r.max_row > r.min_row]
blocks = []
row = 1
while row <= ws.max_row:
covering = [end for start, end in merged if start <= row <= end]
block_end = max(covering) if covering else row
# 提取内容(合并单元格填充值)
content = []
seen = set()
for r in range(row, block_end + 1):
# row_data = [fill_map.get((r, c), cell.value) for c, cell in enumerate(ws[r], 1)]
row_data = [
md_safe(fill_map.get((r, c), cell.value))
for c, cell in enumerate(ws[r], 1)
]
# 过滤空行:所有单元格都是 None 或空字符串
if all(cell is None or (isinstance(cell, str) and not cell.strip()) for cell in row_data):
continue
row_tuple = tuple(row_data)
if row_tuple not in seen:
seen.add(row_tuple)
content.append(row_data)
blocks.append({'start': row, 'end': block_end, 'content': content})
row = block_end + 1
return blocks
def parse_excel(file_path):
"""解析Excel所有工作表,返回 {表名: [块列表]} """
wb = load_workbook(file_path)
result = {name: parse_sheet(wb[name]) for name in wb.sheetnames}
wb.close()
return result
if __name__ == "__main__":
FILE_PATH = r"\ceshi\新建 XLSX 工作表.xlsx"
for sheet, blocks in parse_excel(FILE_PATH).items():
print(f"\n=== {sheet} ===")
for i, b in enumerate(blocks, 1):
print(f"块{i}: 第{b['start']}-{b['end']}行 值:{b['content']}" if b['start'] != b['end'] else f"块{i}: 第{b['start']}行 值:{b['content']}")
# 格式示例
"""
块6: 第6行 值:[[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]]
块7: 第7-18行 值:[[7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [8, 9, 10, 11, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [9, 10, 11, 12, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [10, 11, 12, 13, 11, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [11, 12, 13, 14, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 30, 31, 32, 33], [12, 13, 14, 15, 11, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 28, 31, 32, 33, 34], [13, 14, 15, 16, 11, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 28, 32, 33, 34, 35], [14, 15, 16, 17, 11, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 28, 33, 34, 35, 36], [15, 16, 17, 18, 11, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 28, 34, 35, 36, 37], [16, 17, 18, 19, 11, 21, 22, 23, 24, 25, 25, 27, 28, 29, 30, 31, 32, 33, 28, 35, 36, 37, 38], [17, 18, 19, 20, 11, 22, 23, 24, 25, 26, 25, 28, 29, 30, 31, 32, 33, 34, 28, 36, 37, 38, 39], [18, 19, 20, 21, 11, 23, 24, 25, 26, 27, 25, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]]
"""