Description
See
https://issues.apache.org/jira/browse/TIKA-972
https://issues.apache.org/jira/browse/PDFBOX-1512
[org.apache.jackrabbit.core.query.lucene.LazyTextExtractorField$ParsingTask.run():181] Failed to extract text from a binary property
org.apache.tika.exception.TikaException: Unexpected RuntimeException from org.apache.tika.parser.ParserDecorator$1@5e5c7fba
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:199)
at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135)
at org.apache.jackrabbit.core.query.lucene.JackrabbitParser.parse(JackrabbitParser.java:192)
at org.apache.jackrabbit.core.query.lucene.LazyTextExtractorField$ParsingTask.run(LazyTextExtractorField.java:175)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:334)
at java.util.concurrent.FutureTask.run(FutureTask.java:166)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:178)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:292)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603)
at java.lang.Thread.run(Thread.java:722)
Caused by: java.lang.IllegalArgumentException: Comparison method violates its general contract!
at java.util.TimSort.mergeLo(TimSort.java:747)
at java.util.TimSort.mergeAt(TimSort.java:483)
at java.util.TimSort.mergeCollapse(TimSort.java:410)
at java.util.TimSort.sort(TimSort.java:214)
at java.util.TimSort.sort(TimSort.java:173)
at java.util.Arrays.sort(Arrays.java:659)
at java.util.Collections.sort(Collections.java:217)
at org.apache.pdfbox.util.PDFTextStripper.writePage(PDFTextStripper.java:558)
at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:449)
at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:372)
at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:328)
at org.apache.tika.parser.pdf.PDF2XHTML.process(PDF2XHTML.java:56)
at org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:89)
at org.apache.tika.parser.ParserDecorator.parse(ParserDecorator.java:91)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:197)
... 11 more