Word自动化(C# + Python)(持续更新中...)开发者社区

Word自动化(C# + Python)(持续更新中...)

using NPOI.XWPF.UserModel;
using System.IO;
using System.Text;
namespace getWord
    class Program
        static void Main(string[] args)
            string in_path = System.Console.ReadLine();
            string out_path = System.Console.ReadLine();
            Stream stream = File.OpenRead(in_path);
            XWPFDocument doc = new XWPFDocument(stream);
            string text = "";
            string tmp_text;
            foreach (var para in doc.Paragraphs)
                tmp_text = para.ParagraphText;
                if (tmp_text.Trim() != "")
                    text += tmp_text + "\n";
            StreamWriter swPdfChange = new StreamWriter(out_path, false, Encoding.GetEncoding("gb2312"));
            swPdfChange.Write(text);
            swPdfChange.Close();
}

pip3 install python-docx

import docx
doc = docx.Document('./t.docx')
doc_text = ''
doc_table_text = ''
for paragraph in doc.paragraphs:
    doc_text += paragraph.text + '\n'
for table in doc.tables:
    for row in table.rows:
        for cell in row.cells:
            doc_table_text += cell.text + '\n'
with open('./tt.txt', 'w') as f:
    f.write(doc_text)
    f.write(doc_table_text)
# doc.save ('./tt.docx')

using org.apache.pdfbox.pdmodel;
using org.apache.pdfbox.util;
using System.IO;
using System.Text;
namespace getPDFCon
    class Program
        static void Main(string[] args)
            string in_path = System.Console.ReadLine();
            string out_path = System.Console.ReadLine();
            PDDocument doc = PDDocument.load(in_path);
            PDFTextStripper pdfStripper = new PDFTextStripper();
            string text = pdfStripper.getText(doc);
            // Console.WriteLine(Utf8ToGB2312(text));
            // Console.ReadKey();
            StreamWriter swPdfChange = new StreamWriter(out_path, false, Encoding.GetEncoding("gb2312"));
            swPdfChange.Write(text);
            swPdfChange.Close();
}

doc.styles['Normal'].font.name = u'宋体'
doc.styles['Normal'].font.size = Pt (9)
doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')

p = doc.add_paragraph ()
font = p.add_run ('标题').font
font.bold = True
font.size = Pt (14)
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER

doc = Document ()
p = doc.add_paragraph ()
font = p.add_run ('标题1').font
font.bold = True
font.size = Pt (14)
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
p2 = doc.add_paragraph ()
p2.text = ('标题2')
p2.style.font.size = Pt (20)

Word自动化(C# + Python)(持续更新中...)

Word自动化(C# + Python)(持续更新中...)

前言

读取Word内容

NPOI

NPOI安装

NPOI提取Word内容

用Costura.Fody打包DLL

python-docx

读取PDF内容

python-docx自动生成Word

全局字体

内容字体