]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.help.base/src/org/simantics/help/base/internal/PDFUtil.java
merged svn revision 33114 and added desktop and help plugins
[simantics/platform.git] / bundles / org.simantics.help.base / src / org / simantics / help / base / internal / PDFUtil.java
1 package org.simantics.help.base.internal;\r
2 \r
3 import java.io.File;\r
4 import java.io.FileInputStream;\r
5 import java.io.IOException;\r
6 \r
7 import org.apache.pdfbox.cos.COSDocument;\r
8 import org.apache.pdfbox.pdfparser.PDFParser;\r
9 import org.apache.pdfbox.pdmodel.PDDocument;\r
10 import org.apache.pdfbox.pdmodel.PDDocumentInformation;\r
11 import org.apache.pdfbox.util.PDFTextStripper;\r
12 import org.eclipse.help.search.ISearchDocument;\r
13 \r
14 /**\r
15  * @author Tuukka Lehtonen\r
16  */\r
17 public class PDFUtil {\r
18 \r
19     public static void stripText(File fromPdf, ISearchDocument doc) throws IOException {\r
20         PDFParser parser = new PDFParser(new FileInputStream(fromPdf));\r
21         parser.parse();\r
22 \r
23         try (COSDocument cosDoc = parser.getDocument()) {\r
24             try (PDDocument pdDoc = new PDDocument(cosDoc)) {\r
25                 int numPages = pdDoc.getNumberOfPages();\r
26                 PDFTextStripper stripper = new PDFTextStripper();\r
27                 stripper.setStartPage(1);\r
28                 stripper.setEndPage(numPages);\r
29                 String text = stripper.getText(pdDoc);\r
30                 PDDocumentInformation docInfo = pdDoc.getDocumentInformation();\r
31                 String title = docInfo.getTitle();\r
32                 String subject = docInfo.getSubject();\r
33                 if (title != null)\r
34                     doc.setTitle(title);\r
35                 if (subject != null)\r
36                     doc.setSummary(subject);\r
37                 doc.addContents(text);\r
38             }\r
39         }\r
40     }\r
41 \r
42 }