python docx通过关键字标注字体以及颜色大小等

相关文章推荐

大气的铁板烧 · bootstrap导航菜单 - CSDN文库· 10 月前 ·

性感的野马 · Java 实例 – ...· 1 年前 ·

任性的墨镜 · pyqt5 qlistwidget ...· 1 年前 ·

年轻有为的橡皮擦 · 运行php ...· 1 年前 ·

from docx.document import Document as dc from docx.oxml.table import CT_Tbl from docx.oxml.text.paragraph import CT_P from docx.table import _Cell, Table from docx.text.paragraph import Paragraph from docx.shared import RGBColor # 设置字体颜色 from docx import Document from docx.shared import Pt # 设置字体 from docx.oxml.ns import qn # 设置中文字体 import pandas as pd FILE_PATH = r " D:\xxxx\xxxx\xxxx\xxxx.docx " obj = docx.Document(FILE_PATH) def iter_block_items(parent): # print('utils.py ----> iter_block_items:', 2) if isinstance(parent, dc): parent_elm = parent.element.body elif isinstance(parent, _Cell): parent_elm = parent._tc else : raise ValueError( " [TypeError] Document in insuitable type. " ) for child in parent_elm.iterchildren(): if isinstance(child, CT_P): yield Paragraph(child, parent) elif isinstance(child, CT_Tbl): yield Table(child, parent) def table2list(table): data = [] for i, row in enumerate(table.rows): row_data = [] for cell in row.cells: row_data.append(cell.text) data.append(row_data) return data # 替换的段落关键字 word = ' 段落关键字 ' # 替换的表格关键字 table_text = ' 表格关键字 ' def set_run(run, font_size, bold, color, name): 设置run对象 :param run: :param font_size: 字体大小 :param bold: 是否加粗 :param color: 字体颜色 :param name: 字体名 :return: run.font.size = font_size run.bold = bold run.font.color.rgb = color run.font.name = name # 设置字体必须要下面2步 s = run._element s.rPr.rFonts.set(qn( ' w:eastAsia ' ), name) def paragraphs_utils(obj): for p in obj.paragraphs: # 先循环得到单个段落p for r in p.runs: if word not in r.text: # 判断关键字是否存在于段落文本中 continue # print(r.text) # print(r.style.name) font_size = r.font.size bold = r.bold color = r.font.color.rgb name = u ' 楷体 ' # 使用关键词切分当前run的文本 rest = r.text.split(word) # 清除当前run的内容 r.text = '' for text in rest[:-1 ]: # 循环切割出来的列表 ['','xxxxxxx']或者['xxxxx',''] run = p.add_run(text= text) set_run(run, font_size, bold, color, name) run = p.add_run(word) # 重写关键字部分 set_run(run, font_size, bold, color, name) run.font.color.rgb = RGBColor(255 , 0, 0) run = p.add_run(rest[-1 ]) # 在补齐r.text的内容 set_run(run, font_size, bold, color, name) obj.save( ' 标注后的文档.docx ' ) def table_utils(obj): for p in obj.tables: # 先循环得到单个表格p pd_block = pd.DataFrame(table2list(p)) # 使用table2list 将table转成列表，然后转成pandas的DateFrame对象 for rows in range(pd_block.shape[0]): # 循环pd_block(DateFrame对象)的行数 -》shape方法得到元祖为行数和列数 if rows == 0: continue if table_text != pd_block.iloc[rows, 0]: continue # 判断关键字是否等于当前表的 rows行0列，否则跳过 for cols in range(pd_block.shape[1 ]): if cols == 0: continue rs = p.cell(rows, cols).paragraphs[0] # 此时rows和cols肯定为关键字所在的那行数据，用document对象获取paragraphs取0 for r in rs.runs: # paragraphs中有个runs 是个列表 font_size = r.font.size bold = r.bold color = r.font.color.rgb name = u ' 楷体 ' data = r.text.strip() # 清除当前run的内容 r.text = '' run = rs.add_run(data) # 此时要使用paragraphs的add_run方法重写data数据 set_run(run, font_size, bold, color, name) run.font.color.rgb = RGBColor(255 , 0, 0) obj.save( ' 标注后的表格.docx ' ) for block in iter_block_items(obj): if isinstance(block, Paragraph): for r in block.runs: if word not in r.text: continue print (r.text) print (r.style.name) font_size = r.font.size bold = r.bold color = r.font.color.rgb name = u ' 楷体 ' # 使用关键词切分当前run的文本 rest = r.text.split(word) # 清除当前run的内容 r.text = '' for text in rest[:-1 ]: run = block.add_run(text= text) set_run(run, font_size, bold, color, name) run = block.add_run(word) set_run(run, font_size, bold, color, name) run.font.color.rgb = RGBColor(255 , 0, 0) run = block.add_run(rest[-1 ]) set_run(run, font_size, bold, color, name) else : pd_block = pd.DataFrame(table2list(block)) # 使用table2list 将table转成列表，然后转成pandas的DateFrame对象 for rows in range(pd_block.shape[0]): # 循环pd_block(DateFrame对象)的行数 -》shape方法得到元祖为行数和列数 if rows == 0: continue if table_text != pd_block.iloc[rows, 0]: continue # 判断关键字是否等于当前表的 rows行0列，否则跳过 for cols in range(pd_block.shape[1 ]): if cols == 0: continue rs = block.cell(rows, cols).paragraphs[0] # 此时rows和cols肯定为关键字所在的那行数据，用document对象获取paragraphs取0 for r in rs.runs: # paragraphs中有个runs 是个列表 font_size = r.font.size bold = r.bold color = r.font.color.rgb name = u ' 楷体 ' data = r.text.strip() # 清除当前run的内容 r.text = '' run = rs.add_run(data) # 此时要使用paragraphs的add_run方法重写data数据 set_run(run, font_size, bold, color, name) run.font.color.rgb = RGBColor(255 , 0, 0) obj.save( ' 段落与表格标注后的文档.docx ' )

匹配关键字回写docx替换颜色