]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/org.simantics.help.base/src/org/simantics/help/base/internal/PDFUtil.java
merged svn revision 33114 and added desktop and help plugins
[simantics/platform.git] / bundles / org.simantics.help.base / src / org / simantics / help / base / internal / PDFUtil.java
diff --git a/bundles/org.simantics.help.base/src/org/simantics/help/base/internal/PDFUtil.java b/bundles/org.simantics.help.base/src/org/simantics/help/base/internal/PDFUtil.java
new file mode 100644 (file)
index 0000000..c12e56b
--- /dev/null
@@ -0,0 +1,42 @@
+package org.simantics.help.base.internal;\r
+\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.IOException;\r
+\r
+import org.apache.pdfbox.cos.COSDocument;\r
+import org.apache.pdfbox.pdfparser.PDFParser;\r
+import org.apache.pdfbox.pdmodel.PDDocument;\r
+import org.apache.pdfbox.pdmodel.PDDocumentInformation;\r
+import org.apache.pdfbox.util.PDFTextStripper;\r
+import org.eclipse.help.search.ISearchDocument;\r
+\r
+/**\r
+ * @author Tuukka Lehtonen\r
+ */\r
+public class PDFUtil {\r
+\r
+    public static void stripText(File fromPdf, ISearchDocument doc) throws IOException {\r
+        PDFParser parser = new PDFParser(new FileInputStream(fromPdf));\r
+        parser.parse();\r
+\r
+        try (COSDocument cosDoc = parser.getDocument()) {\r
+            try (PDDocument pdDoc = new PDDocument(cosDoc)) {\r
+                int numPages = pdDoc.getNumberOfPages();\r
+                PDFTextStripper stripper = new PDFTextStripper();\r
+                stripper.setStartPage(1);\r
+                stripper.setEndPage(numPages);\r
+                String text = stripper.getText(pdDoc);\r
+                PDDocumentInformation docInfo = pdDoc.getDocumentInformation();\r
+                String title = docInfo.getTitle();\r
+                String subject = docInfo.getSubject();\r
+                if (title != null)\r
+                    doc.setTitle(title);\r
+                if (subject != null)\r
+                    doc.setSummary(subject);\r
+                doc.addContents(text);\r
+            }\r
+        }\r
+    }\r
+\r
+}
\ No newline at end of file