package org.simantics.help.base.internal; import java.io.File; import java.io.IOException; import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.io.RandomAccessFile; import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentInformation; import org.apache.pdfbox.text.PDFTextStripper; import org.eclipse.help.search.ISearchDocument; /** * @author Tuukka Lehtonen */ public class PDFUtil { public static void stripText(File fromPdf, ISearchDocument doc) throws IOException { PDFParser parser = new PDFParser(new RandomAccessFile(fromPdf, "r")); parser.parse(); try (COSDocument cosDoc = parser.getDocument()) { try (PDDocument pdDoc = new PDDocument(cosDoc)) { int numPages = pdDoc.getNumberOfPages(); PDFTextStripper stripper = new PDFTextStripper(); stripper.setStartPage(1); stripper.setEndPage(numPages); String text = stripper.getText(pdDoc); PDDocumentInformation docInfo = pdDoc.getDocumentInformation(); String title = docInfo.getTitle(); String subject = docInfo.getSubject(); if (title != null) doc.setTitle(title); if (subject != null) doc.setSummary(subject); doc.addContents(text); } } } }