--- /dev/null
+package org.simantics.help.base.internal;\r
+\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.IOException;\r
+\r
+import org.apache.pdfbox.cos.COSDocument;\r
+import org.apache.pdfbox.pdfparser.PDFParser;\r
+import org.apache.pdfbox.pdmodel.PDDocument;\r
+import org.apache.pdfbox.pdmodel.PDDocumentInformation;\r
+import org.apache.pdfbox.util.PDFTextStripper;\r
+import org.eclipse.help.search.ISearchDocument;\r
+\r
+/**\r
+ * @author Tuukka Lehtonen\r
+ */\r
+public class PDFUtil {\r
+\r
+ public static void stripText(File fromPdf, ISearchDocument doc) throws IOException {\r
+ PDFParser parser = new PDFParser(new FileInputStream(fromPdf));\r
+ parser.parse();\r
+\r
+ try (COSDocument cosDoc = parser.getDocument()) {\r
+ try (PDDocument pdDoc = new PDDocument(cosDoc)) {\r
+ int numPages = pdDoc.getNumberOfPages();\r
+ PDFTextStripper stripper = new PDFTextStripper();\r
+ stripper.setStartPage(1);\r
+ stripper.setEndPage(numPages);\r
+ String text = stripper.getText(pdDoc);\r
+ PDDocumentInformation docInfo = pdDoc.getDocumentInformation();\r
+ String title = docInfo.getTitle();\r
+ String subject = docInfo.getSubject();\r
+ if (title != null)\r
+ doc.setTitle(title);\r
+ if (subject != null)\r
+ doc.setSummary(subject);\r
+ doc.addContents(text);\r
+ }\r
+ }\r
+ }\r
+\r
+}
\ No newline at end of file