Python PDF转换工具箱(PDF转图片,word,拆分,删除,提取)
1.简介:
利用Python自写的pdf工具箱,包罗pdf转word,图片,合并,页面拆分,页面删除,页面提取、
转换word,图片功能,支持文件拖入。文章末了已附源码以及打包好的exe文件,各人需要可自行下载学习,喜欢的话给博主点个小小的关注哦,主页还将会更新更多Python相干干货资源,关注不迷路哦!
功能先容:
合并:添加次序就是合并次序,可多次添加。
拆分:将输入页码的范围拆分成每个独立的pdf,单次可输入多个范围。
删除:将输入页码的范围删除,单次可输入多个范围,生存删除后的文件。
提取:将输入页码的范围提取成独立的pdf,单次可输入多个范围。
2.运行结果:
3.相干源码:
- import os
- import re
- import sys
- from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QVBoxLayout, QWidget, QFileDialog, QListWidget, \
- QMessageBox, QLineEdit, QHBoxLayout
- from PyQt5.QtCore import QThread, pyqtSignal
- from PyPDF2 import PdfReader, PdfWriter, PdfMerger
- from pdf2docx import Converter
- import fitz # 用于PDF转JPG的处理
- class CustomListWidget(QListWidget):
- def __init__(self, parent=None):
- super().__init__(parent)
- self.setAcceptDrops(True)
- self.parentWindow = parent
- def dragEnterEvent(self, event):
- if any(url.toString().lower().endswith('.pdf') for url in event.mimeData().urls()):
- event.acceptProposedAction()
- def dragMoveEvent(self, event):
- if any(url.toString().lower().endswith('.pdf') for url in event.mimeData().urls()):
- event.acceptProposedAction()
- def dropEvent(self, event):
- pdf_files = [url.toLocalFile() for url in event.mimeData().urls() if url.toString().lower().endswith('.pdf')]
- for f in pdf_files:
- self.parentWindow.addPDFFile(f)
- class Worker(QThread):
- finished = pyqtSignal(str)
- error = pyqtSignal(str)
- def __init__(self, pdf_files, range_str=None, save_path=None, operation=None):
- super().__init__()
- self.pdf_files = pdf_files
- self.range_str = range_str
- self.save_path = save_path
- self.operation = operation
- def run(self):
- try:
- if self.operation == 'merge':
- self.merge_pdfs()
- elif self.operation == 'split':
- self.split_pdfs()
- elif self.operation == 'delete':
- self.delete_pages()
- elif self.operation == 'extract':
- self.extract_pages()
- elif self.operation == 'jpg':
- self.pdf_to_jpg()
- elif self.operation == 'word':
- self.pdf_to_word()
- except Exception as e:
- self.error.emit(str(e))
- def merge_pdfs(self):
- merger = PdfMerger()
- for pdf in self.pdf_files:
- merger.append(pdf)
- merger.write(self.save_path)
- merger.close()
- self.finished.emit('PDF文件已成功合并。')
- def split_pdfs(self):
- ranges = self.parse_ranges(self.range_str)
- reader = PdfReader(self.pdf_files[0])
- os.makedirs(self.save_path, exist_ok=True) # 确保目标文件夹存在
- file_index = 1 # 用于创建唯一的文件名
- for range_index, (start_page, end_page) in enumerate(ranges):
- # 对于每个范围,拆分出来的每个页面为一个单独的PDF文件
- for page_num in range(start_page, end_page + 1):
- writer = PdfWriter()
- writer.add_page(reader.pages[page_num])
- # 使用文件索引来确保每个文件的名称都是唯一的
- split_save_path = os.path.join(self.save_path, f'split_page_{file_index}.pdf')
- with open(split_save_path, 'wb') as f:
- writer.write(f)
- file_index += 1
- self.finished.emit('PDF文件已成功拆分并保存。')
- def delete_pages(self):
- ranges = self.parse_ranges(self.range_str)
- reader = PdfReader(self.pdf_files[0])
- writer = PdfWriter()
- pages_to_delete = {page for start, end in ranges for page in range(start, end + 1)}
- for i in range(len(reader.pages)):
- if i not in pages_to_delete:
- writer.add_page(reader.pages[i])
- with open(self.save_path, 'wb') as f:
- writer.write(f)
- self.finished.emit('指定页面已从PDF中删除。')
- def extract_pages(self):
- ranges = self.parse_ranges(self.range_str)
- reader = PdfReader(self.pdf_files[0])
- os.makedirs(self.save_path, exist_ok=True) # 在循环外提前确保目录存在
- for i, (start_page, end_page) in enumerate(ranges):
- writer = PdfWriter()
- for page_num in range(start_page, end_page + 1):
- writer.add_page(reader.pages[page_num])
- extract_save_path = os.path.join(self.save_path, f'extract_{i + 1}.pdf')
- with open(extract_save_path, 'wb') as f:
- writer.write(f)
- self.finished.emit('指定页面已从PDF中提取。')
- def pdf_to_jpg(self):
- for file in self.pdf_files:
- pdf = fitz.open(file)
- img_folder = os.path.join(self.save_path, os.path.splitext(os.path.basename(file))[0])
- os.makedirs(img_folder, exist_ok=True)
- for pg in range(pdf.page_count):
- page = pdf[pg]
- trans = fitz.Matrix(2, 2) # 设置转换矩阵为放大2倍
- pm = page.get_pixmap(matrix=trans, alpha=False)
- pic_name = f'Page_{pg + 1}.jpg'
- pic_path = os.path.join(img_folder, pic_name)
- pm.save(pic_path)
- self.finished.emit('PDF文件已成功转换为图片。')
- def pdf_to_word(self):
- for file in self.pdf_files:
- docx_name = os.path.splitext(file)[0] + '.docx'
- cv = Converter(file)
- cv.convert(docx_name, start=0, end=None)
- cv.close()
- self.finished.emit('PDF文件已成功转换为Word文档。')
- def parse_ranges(self, ranges_str):
- ranges = []
- for part in re.split(',|,', ranges_str):
- if '-' in part:
- start_page, end_page = map(int, part.split('-'))
- ranges.append((start_page - 1, end_page - 1))
- else:
- page = int(part)
- ranges.append((page - 1, page - 1))
- return ranges
- class PDFMergerApp(QMainWindow):
- def __init__(self):
- super().__init__()
- self.initUI()
- self.pdf_files = []
- def initUI(self):
- self.setWindowTitle('PDF 工具箱')
- self.setGeometry(100, 100, 800, 600)
- mainLayout = QVBoxLayout()
- self.addButton = QPushButton('添加 PDF', self)
- self.addButton.clicked.connect(self.addPDF)
- mainLayout.addWidget(self.addButton)
- self.listWidget = CustomListWidget(self)
- mainLayout.addWidget(self.listWidget)
- deleteLayout = QHBoxLayout()
- self.removeButton = QPushButton('删除选定', self)
- self.removeButton.clicked.connect(self.removeSelected)
- deleteLayout.addWidget(self.removeButton)
- self.removeAllButton = QPushButton('删除全部', self)
- self.removeAllButton.clicked.connect(self.removeAll)
- deleteLayout.addWidget(self.removeAllButton)
- mainLayout.addLayout(deleteLayout)
- convertLayout = QHBoxLayout()
- self.convertJPGButton = QPushButton('转换为图片', self)
- self.convertJPGButton.clicked.connect(self.convertToJPG)
- convertLayout.addWidget(self.convertJPGButton)
- self.convertWordButton = QPushButton('转换为Word', self)
- self.convertWordButton.clicked.connect(self.convertToWord)
- convertLayout.addWidget(self.convertWordButton)
- mainLayout.addLayout(convertLayout)
- self.mergeButton = QPushButton('合并 PDFs', self)
- self.mergeButton.clicked.connect(self.mergePDFs)
- mainLayout.addWidget(self.mergeButton)
- splitLayout = QHBoxLayout()
- self.splitInput = QLineEdit(self)
- self.splitInput.setPlaceholderText('输入拆分页码范围可输入多个范围,如1,3-4,8-15')
- splitLayout.addWidget(self.splitInput)
- self.splitButton = QPushButton('拆分页面', self)
- self.splitButton.clicked.connect(self.splitPDF)
- splitLayout.addWidget(self.splitButton)
- mainLayout.addLayout(splitLayout)
- deletePageLayout = QHBoxLayout()
- self.deleteInput = QLineEdit(self)
- self.deleteInput.setPlaceholderText('输入删除页码范围可输入多个范围,如1,3-4,8-15')
- deletePageLayout.addWidget(self.deleteInput)
- self.deleteButton = QPushButton('删除页面', self)
- self.deleteButton.clicked.connect(self.deletePages)
- deletePageLayout.addWidget(self.deleteButton)
- mainLayout.addLayout(deletePageLayout)
- extractLayout = QHBoxLayout()
- self.extractInput = QLineEdit(self)
- self.extractInput.setPlaceholderText('输入提取页码范围可输入多个范围,如1,3-4,8-15')
- extractLayout.addWidget(self.extractInput)
- self.extractButton = QPushButton('提取页面', self)
- self.extractButton.clicked.connect(self.extractPages)
- extractLayout.addWidget(self.extractButton)
- mainLayout.addLayout(extractLayout)
- container = QWidget()
- container.setLayout(mainLayout)
- self.setCentralWidget(container)
- def addPDF(self):
- files, _ = QFileDialog.getOpenFileNames(self, '打开文件', '', 'PDF files (*.pdf)')
- for file_path in files:
- self.addPDFFile(file_path)
- def addPDFFile(self, file_path):
- if file_path and file_path not in self.pdf_files:
- self.pdf_files.append(file_path)
- self.listWidget.addItem(file_path)
- def removeSelected(self):
- for item in self.listWidget.selectedItems():
- self.pdf_files.remove(item.text())
- self.listWidget.takeItem(self.listWidget.row(item))
- def removeAll(self):
- self.pdf_files.clear()
- self.listWidget.clear()
- def mergePDFs(self):
- save_path, _ = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)')
- if save_path:
- self.thread = Worker(self.pdf_files, save_path=save_path, operation='merge')
- self.thread.finished.connect(self.onFinished)
- self.thread.error.connect(self.onError)
- self.thread.start()
- def splitPDF(self):
- if len(self.pdf_files) != 1:
- QMessageBox.warning(self, "错误", "请只选择一个PDF文件进行拆分。")
- return
- range_str = self.splitInput.text().strip()
- folder_path = self.getFolderName()
- if range_str and folder_path:
- self.thread = Worker(self.pdf_files, range_str=range_str, save_path=folder_path, operation='split')
- self.thread.finished.connect(self.onFinished)
- self.thread.error.connect(self.onError)
- self.thread.start()
- def deletePages(self):
- if len(self.pdf_files) != 1:
- QMessageBox.warning(self, "错误", "请只选择一个PDF文件进行删除操作。")
- return
- range_str = self.deleteInput.text().strip()
- save_path = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)')[0]
- if save_path and range_str:
- self.thread = Worker(self.pdf_files, range_str=range_str, save_path=save_path, operation='delete')
- self.thread.finished.connect(self.onFinished)
- self.thread.error.connect(self.onError)
- self.thread.start()
- def extractPages(self):
- if len(self.pdf_files) != 1:
- QMessageBox.warning(self, "错误", "请只选择一个PDF文件进行提取操作。")
- return
- range_str = self.extractInput.text().strip()
- save_path = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)')[0]
- if save_path and range_str:
- self.thread = Worker(self.pdf_files, range_str=range_str, save_path=save_path, operation='extract')
- self.thread.finished.connect(self.onFinished)
- self.thread.error.connect(self.onError)
- self.thread.start()
- def convertToJPG(self):
- save_path = QFileDialog.getExistingDirectory(self, "选择保存图片的位置")
- if save_path:
- self.thread = Worker(self.pdf_files, save_path=save_path, operation='jpg')
- self.thread.finished.connect(self.onFinished)
- self.thread.error.connect(self.onError)
- self.thread.start()
- def convertToWord(self):
- save_path = QFileDialog.getExistingDirectory(self, "选择保存Word的位置")
- if save_path:
- self.thread = Worker(self.pdf_files, save_path=save_path, operation='word')
- self.thread.finished.connect(self.onFinished)
- self.thread.error.connect(self.onError)
- self.thread.start()
- def getFolderName(self):
- folder_path = QFileDialog.getExistingDirectory(self, "选择保存拆分文件的位置")
- return folder_path
- def onFinished(self, message):
- QMessageBox.information(self, "操作完成", message)
- self.clear_pdf_list()
- self.clear_text_inputs()
- def onError(self, error_message):
- QMessageBox.warning(self, "操作失败", error_message)
- def clear_pdf_list(self):
- self.pdf_files.clear()
- self.listWidget.clear()
- def clear_text_inputs(self):
- # 清除所有的QLineEdit控件内容
- self.splitInput.clear()
- self.deleteInput.clear()
- self.extractInput.clear()
- def main():
- app = QApplication(sys.argv)
- ex = PDFMergerApp()
- ex.show()
- sys.exit(app.exec_())
- if __name__ == '__main__':
- main()
复制代码 免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |