# 将 PDF 文件分解为图片
def convert_pdf_to_images(img_path,doc)
for i, page in enumerate(doc):
pix = page.getPixmap(matrix=fitz.Matrix(2, 2))
img_output_path = os.path.join(img_path, f"{i+1}.jpg")
pix.writePNG(img_output_path)
然后再对图片中的特定颜色进行替换
def replace_color(pixel, old_color, new_color):
if pixel in old_color:
return new_color
else:
return pixel
old_color = (128, 130, 133)
new_color = (255, 255, 255)
# 遍历输入文件夹中的所有.jpg文件并进行颜色替换
for filename in os.listdir(input_folder):
if filename.endswith('.jpg') or filename.endswith('.png'):
# 打开图片并获取像素数据
image = Image.open(os.path.join(input_folder, filename))
pixels = list(image.getdata())
# 遍历像素数据并进行颜色替换
new_pixels = [replace_color(pixel, old_color, new_color) for pixel in pixels]
# 将修改后的像素数据保存到新文件夹中
new_image = Image.new(image.mode, image.size)
new_image.putdata(new_pixels)
new_image.save(os.path.join(output_folder, filename))
最后再将图片拼接起来则得到去水印后的 PDF
def convert_images_to_pdf(img_path, output_path):
image_list = []
for filename in sorted(os.listdir(img_path), key=lambda x: int(x.split('.')[0])):
image_list.append(Image.open(os.path.join(img_path, filename)))
image_list[0].save(output_path, save_all=True, append_images=image_list[1:])
将以上三个步骤合并,即可得到我们的最终代码
from PIL import Image
import fitz
import os
def replace_color(img_path):
new_color = (255, 255, 255, 255)
for filename in os.listdir(img_path):
image = Image.open(os.path.join(img_path, filename))
new_pixels = [new_color if pixel == (220,220,220) else pixel for pixel in image.getdata()]
new_image = Image.new(image.mode, image.size)
new_image.putdata(new_pixels)
new_image.save(os.path.join(img_path, filename))
def convert_pdf_to_images(img_path,doc):
for i, page in enumerate(doc):
pix = page.getPixmap(matrix=fitz.Matrix(2, 2))
img_output_path = os.path.join(img_path, f"{i+1:04d}.jpg")
pix.writePNG(img_output_path)
def creat_file(img_path):
if not os.path.exists(img_path):
os.makedirs(img_path)
else:
for root, dirs, files in os.walk(img_path, topdown=False):
for name in files:
os.remove(os.path.join(root, name))
for name in dirs:
os.rmdir(os.path.join(root, name))
def convert_images_to_pdf(img_path, output_path):
image_list = []
for filename in os.listdir(img_path):
image_list.append(Image.open(os.path.join(img_path, filename)))
image_list[0].save(output_path, save_all=True, append_images=image_list[1:])
def delete_watermark(file_path):
folder_path = os.path.dirname(file_path)
file_name = os.path.basename(file_path)
output_path = os.path.join(folder_path, 'new_'+file_name)
img_path = os.path.join(folder_path, f"img_{os.path.splitext(file_name)[0]}")
creat_file(img_path)
convert_pdf_to_images(img_path, doc=fitz.open(file_path))
replace_color(img_path)
convert_images_to_pdf(img_path, output_path)
if __name__ == "__main__":
delete_watermark('E:\...\example.pdf')
只需要对水印和背景进行取色,然后更改相应代码即可实现全自动 Python 去水印功能。
由于水印颜色并不总是某一个RGB值,而是一个范围,所以也可以使用
218<pixel[0]<244 and 218<pixel[1]<244 and 218<pixel[2]<244:
替换
pixel == (220,220,220)
。