from docx import Document
from openpyxl import Workbook
from docx.shared import Cm #Cm模块,用于设定图片尺寸大小
#word中文档成为Document,每段内容称为Paragraph,每个段中不同部分称为Run(颜色、字体、粗细、斜体等不同就是不同的文字块)
doc = Document(r"Test.docx")
#提取文字和文字块儿
print(doc.paragraphs)
for paragraph in doc.paragraphs:
print(paragraph.text)
paragraph = doc.paragraphs[0]
runs = paragraph.runs
print(runs)
for run in paragraph.runs:
print(run.text)
paragraph = doc.paragraphs[3]
runs = paragraph.runs
print(runs)
for run in paragraph.runs:
print(run.text)
list1 = [["name","sex","Provin"],["violet1","女","日本省"],["violet2","女","日本省"],["violet3","女","日本省"],["violet4","女","日本省"]]
list2 = [["name","sex","Provin"],["violet5","女","日本省"],["violet6","女","日本省"],["violet7","女","日本省"]]
#向Word文档写入内容
paragraph1 = doc.add_paragraph("新加段落1")
paragraph2 = doc.add_paragraph("新加段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("加粗文字块").bold = True
paragraph3.add_run(",普通文字块, ")
paragraph3.add_run("斜体文字块").italic = True
doc.add_page_break() #添加分页
#doc.add_picture(r"E:\PycharmProjects\SpiderTest\violet.png",width=Cm(5),height=Cm(5))
table1 = doc.add_table(rows=5,cols=3)
for row in range(5):
cells = table1.rows[row].cells
for col in range(3):
cells[col].text = str(list1[row][col])
doc.add_paragraph("-----------------------------------------------------------")
table2 = doc.add_table(rows=4,cols=3)
for row in range(4):
cells = table2.rows[row].cells
for col in range(3):
cells[col].text = str(list2[row][col])
doc.save(r"Test2.docx")
#提取word中的表格,并保存在excel
t0 = doc.tables[0]
workbook = Workbook()
sheet = workbook.active
for i in range(len(t0.rows)):
list1 = []
for j in range(len(t0.columns)):
print("元素:"+t0.cell(i,j).text)
list1.append(t0.cell(i,j).text)
print("表格每一行list:", list1)
sheet.append(list1)
workbook.save(filename = r"TestByWord.xlsx")