PDF BOX 를 이용하여 pdf 파일의 텍스트값을 가져오고 있습니다.
각 페이지마다 정상적으로 파싱이 되어 String 을 뽑아내지만, 파싱할때 마다 에러를 뿜어내네요.
한 페이지 할 때마다 너무 양이 방대하기에 작업하는데 불편하기도 하고, 찜찜해서 해당 로그의 원인을 정확히 파악하고 싶습니다.
조언 구합니다.
09-01 15:23:32.371: E/PDResources(19501): error while creating a xobject
09-01 15:23:32.371: E/PDResources(19501): java.io.IOException: Invalid filter: COSName{DCTDecode}
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.filter.FilterFactory.getFilter(FilterFactory.java:85)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.cos.COSStream.doDecode(COSStream.java:305)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.cos.COSStream.doDecode(COSStream.java:278)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.cos.COSStream.getDecodeResult(COSStream.java:235)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject.<init>(PDImageXObject.java:89)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.pdmodel.graphics.PDXObject.createXObject(PDXObject.java:65)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.pdmodel.PDResources.getXObjects(PDResources.java:247)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.util.PDFStreamEngine.getXObjects(PDFStreamEngine.java:579)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.util.operator.Invoke.process(Invoke.java:53)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:529)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:254)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:221)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:203)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:453)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:378)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:337)
09-01 15:23:32.371: E/PDResources(19501): at org.apache.pdfbox.util.PDFTextStripper.getText(PDFTextStripper.java:260)
09-01 15:23:32.371: E/PDResources(19501): at 패키지명.PDF_PageParse(PDFMode.java:329)
09-01 15:23:32.371: E/PDResources(19501): at 패키지명.access$0(PDFMode.java:299)
09-01 15:23:32.371: E/PDResources(19501): at 패키지명$PageTask.doInBackground(PDFMode.java:361)
09-01 15:23:32.371: E/PDResources(19501): at 패키지명$PageTask.doInBackground(PDFMode.java:1)
09-01 15:23:32.371: E/PDResources(19501): at android.os.AsyncTask$2.call(AsyncTask.java:287)
09-01 15:23:32.371: E/PDResources(19501): at java.util.concurrent.FutureTask.run(FutureTask.java:234)
09-01 15:23:32.371: E/PDResources(19501): at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1080)
09-01 15:23:32.371: E/PDResources(19501): at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:573)
09-01 15:23:32.371: E/PDResources(19501): at java.lang.Thread.run(Thread.java:856)