C# itextsharp替换PDF中的某个图片_itextsharp pdfname.resources_走错路的程序员的博客

相关文章推荐

读研的红薯 · Visual Studio 中的 ...· 8 月前 ·

咆哮的馒头 · 元素隐式具有 “any“ 类型，因为类型为 ...· 1 年前 ·

任性的斑马 · jqgrid sharepoint ...· 1 年前 ·

千杯不醉的紫菜 · python - 'QPixmap' ...· 1 年前 ·

因业务需要把pdf中的logo图片换成其它图片.
百度了很久也没找到相关的解决方案. 后来总算是解决了.
总结起来一句话, itextsharp的文档太少了. 尤其是关于几个基础类的相关文档, 还有pdf文档格式也是很要命的难理解. 简直是狗屎般的格式. 用Sprie.pdf 很好用很简单, 就是得花钱.

业务需求还需要合并多个pdf, 搞了好久实在是不知道如何合并的同时再去掉logo.
就改成分两步走. 先合并成一个文件后, 输出到MemoryStream,然后再去掉logo…

废话不多说上代码.

private void Main ( ) //读取待替换logo的pdf文件, 这个地方可以是Stream可以放在内存里. PdfReader reader2 = new PdfReader ( "D:\\a.pdf" ) ; //输出文件流. using ( FileStream fs = new FileStream ( "D:\\Marge.pdf" , FileMode . Create , FileAccess . Write , FileShare . None ) ) using ( PdfStamper stamper = new PdfStamper ( reader2 , fs ) ) PdfDictionary page ; //Get the page count int pageCount2 = reader2 . NumberOfPages ; //Loop through each page for ( int i = 1 ; i <= pageCount2 ; i ++ ) //Get the page page = reader2 . GetPageN ( i ) ; PdfObject obj = FindImageInPDFDictionary ( page , DistinguishImageIsLogo ) ; //Get the raw content //contentarray = page.GetAsArray(PdfName.RESOURCES); if ( obj != null ) PdfReader . KillIndirect ( obj ) ; //移除老图片,只是移除了关联. iTextSharp . text . Image img = iTextSharp . text . Image . GetInstance ( "D:\\cc.png" ) ; //必须每个页面新建一个图片的对象,否则会只在第一个页面有图片. iTextSharp . text . Image maskImage = img . ImageMask ; if ( maskImage != null ) stamper . Writer . AddDirectImageSimple ( maskImage ) ; //把新图片写进去 stamper . Writer . AddDirectImageSimple ( img , ( PRIndirectReference ) obj ) ; //把新图片写进去. //在pdf页面中找到logo图片 private static PdfObject FindImageInPDFDictionary ( PdfDictionary pg , DistinguishImage distinguishMethod ) PdfDictionary res = ( PdfDictionary ) PdfReader . GetPdfObject ( pg . Get ( PdfName . RESOURCES ) ) ; PdfDictionary xobj = ( PdfDictionary ) PdfReader . GetPdfObject ( res . Get ( PdfName . XOBJECT ) ) ; if ( xobj != null ) foreach ( PdfName name in xobj . Keys ) Console . WriteLine ( name . ToString ( ) ) ; PdfObject obj = xobj . Get ( name ) ; if ( obj . IsIndirect ( ) ) PdfDictionary tg = ( PdfDictionary ) PdfReader . GetPdfObject ( obj ) ; PdfName type = ( PdfName ) PdfReader . GetPdfObject ( tg . Get ( PdfName . SUBTYPE ) ) ; //image at the root of the pdf if ( PdfName . IMAGE . Equals ( type ) ) if ( distinguishMethod ( tg ) == true ) return obj ; continue ; //继续找 } // image inside a form else if ( PdfName . FORM . Equals ( type ) ) return FindImageInPDFDictionary ( tg , distinguishMethod ) ; } //image inside a group else if ( PdfName . GROUP . Equals ( type ) ) return FindImageInPDFDictionary ( tg , distinguishMethod ) ; return null ; /// <summary> /// 辨别图片的委托 /// </summary> /// <param name="imgObject"></param> /// <returns></returns> delegate bool DistinguishImage ( PdfDictionary imgObject ) ; /// <summary> /// 辨别图片是不是LOGO /// </summary> /// <param name="imgObject"></param> /// <returns></returns> private static bool DistinguishImageIsLogo ( PdfDictionary imgObject ) int width , height , length ; int . TryParse ( imgObject . Get ( PdfName . WIDTH ) . ToString ( ) , out width ) ; int . TryParse ( imgObject . Get ( PdfName . HEIGHT ) . ToString ( ) , out height ) ; int . TryParse ( imgObject . Get ( PdfName . LENGTH ) . ToString ( ) , out length ) ; //从这3个参数就可以判断是不是logo, 也可以按照name来判断.还可以硬编码判断两个图片对象是否一样. if ( width == 270 && height == 111 && length == 11878 ) return true ; return false ;

以上是处理文件的方式,处理MemoryStream流, 需要用到特殊的自定义的内存流,防止程序自动关闭了流.

public class PdfMemoryStream : System . IO . MemoryStream public PdfMemoryStream ( byte [ ] bytes ) : base ( bytes ) AllowClose = true ; public PdfMemoryStream ( ) AllowClose = true ; public bool AllowClose { get ; set ; } public override void Close ( ) if ( AllowClose ) base . Close ( ) ;

然后把Main方法重构了一下如下

        /// <summary>
        /// 替换PDF中的图片
        /// </summary>
        /// <param name="src">pdf文件流</param>
        /// <param name="distinguishMethod">识别需要被替换图片的方法</param>
        /// <param name="replaceToImg">替换成这个图片</param>
        /// <returns></returns>
       public static MemoryStream ReplaceImage(Stream src, DistinguishImage distinguishMethod, System.Drawing.Image replaceToImg)
            PdfReader reader2 = new PdfReader(src);
            PdfMemoryStream outMemoryStream = new PdfMemoryStream();
            outMemoryStream.AllowClose = false;
            using (PdfStamper stamper = new PdfStamper(reader2, outMemoryStream))
                int pageCount2 = reader2.NumberOfPages; 
                for (int i = 1; i <= pageCount2; i++)
                    //Get the page
                    var page = reader2.GetPageN(i);
                    PdfObject obj = FindImageInPDFDictionary(page, distinguishMethod); 
                    if (obj != null)
                        PdfReader.KillIndirect(obj);//移除老图片 
                        iTextSharp.text.Image img = iTextSharp.text.Image.GetInstance(replaceToImg,BaseColor.WHITE,true);
                        iTextSharp.text.Image maskImage = img.ImageMask;
                        if (maskImage != null)
                            stamper.Writer.AddDirectImageSimple(maskImage);
                            stamper.Writer.AddDirectImageSimple(img, (PRIndirectReference)obj);
            outMemoryStream.Position = 0;
            return outMemoryStream;
参考链接
 https://yq.aliyun.com/articles/565318 用程序自动替换PDF文件中的图像
 https://blog.csdn.net/java2000_net/article/details/3734534 iText使用入门:编辑,增加,导入,水印,合并PDF的例子
 https://zhuchengzzcc.iteye.com/blog/1603671 iText 操作Pdf 简单整理
 https://blog.csdn.net/sand_clock/article/details/77505181 ITEXT PDF文件的拆分与合并
                                    前段时间，为了解析PDF，花了不少时间去学习PDFbox和itext，这两个都是处理PDF的开源库，有java和C#的。作为一个刚开始学习这两个开源库的，感觉百度上的资源还是太少了。我做的是一个关于PDF的处理，在百度上找了半天都没找到答案，最后去itext的官网和Stack Overflow上找到了答案。最后比较了一下，pdfbox和itext相对而言，itext的功能要强不少，本人对比过ite
                                    虽然Bruno以包含＆＃34;第2层＆＃34;的PDF开头解决了这个问题，但请允许我先说明使用这些＆＃34;签名层＆＃34;在PDF签名外观是不 PDF规范，规范实际上根本不知道这些层！因此，如果您尝试解析特定图层，则可能找不到这样的图层＆＃34;或者更糟糕的是，找到一个看起来像那个包含错误数据的图层(一个名为n2的XObject)的东西。尽管如此，无论您是从第2层查找文本还是从签名外观中查找文本...
                                    对PDF中敏感信息脱敏操作，比如将银行电子回执单内的收款账号、付款账号进行脱敏替换。
支持如下银行网商银行、平安银行、光大银行、招商银行、民生银行、天津金城银行、建设银行、广发网上银行、中国建设银行
......
                                    iText是一款PDF第三方库，来自美国iText软件公司。目前有iText5与iText7两个比较大的版本。当然作为商业库来说是收费的，但是也提供了一些免费开源库给大家使用。目前来说常用的是iText5。这是一个Java库，如果要在.Net环境下使用的话，需要用到iTextSharp。
iTextSharp可以帮助用户进行PDF的很多操作（转换除外），包括编辑，合并，修改，插入，生成等等操作。
                                    一、PdfObject: pdf对象 ，有9种，对象是按照对象内涵来分的，如果按照对象的使用规则来说，对象又分为间接对象和直接对象。间接对象是PDF中最常用的对象，如前面对象集合里面的，所有对象都是间接对象，在其他位置通过R关键字来引用，在交叉引用表里面都是通过间接对象来引用的。直接对象就更好理解了，9种对象单独出现的时候就叫直接对象。
PdfObject pdfObject = this....
                                    要使用Java代码，应该调用iText库。要使用C#代码，应该调用iTextSharp库。Here's the code to replace images in PDFs, in Java and C#. It will replace the first image in the first page.Java代码
PdfReader pdf =...
                                    1             var pdfReader = new PdfReader("xxx.pdf");
 3             StreamWriter output = new StreamWriter(new FileStream("处理结果.txt", FileMode.Create));
 5             in...
                                        作为我的iTextSharp系列的文章的第七篇,开始探索使用iTextSharp在PDF中操作图片，理解本篇文章需要看过系列文章的前六篇：      在ASP.NET中创建PDF-iTextSharp起步      在Asp.Net中操作PDF - iTextSharp - 使用字体      在Asp.Net中操作PDF – iTextSharp -利用块，短语，段落添加文本      ...
                                    插入绝对定位的图片
iTextSharp.text.Image splitline = iTextSharp.text.Image.GetInstance(Server.MapPath("images\\splitline.jpg"));
                splitline.SetAbsolutePosition(30, PageSize.A4.Height - 150);