Index: pom.xml
===================================================================
--- pom.xml (revision 36390)
+++ pom.xml (working copy)
@@ -73,6 +73,12 @@
provided
+ org.apache.tika
+ tika-parsers
+ ${tika.version}
+ test
+
+
com.drewnoakes
metadata-extractor
${drewnoakes-extractor.version}
Index: src/main/java/org/hippoecm/frontend/editor/plugins/resource/ResourceHelper.java
===================================================================
--- src/main/java/org/hippoecm/frontend/editor/plugins/resource/ResourceHelper.java (revision 36390)
+++ src/main/java/org/hippoecm/frontend/editor/plugins/resource/ResourceHelper.java (working copy)
@@ -30,8 +30,10 @@
import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.JcrConstants;
+import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
import org.apache.tika.utils.ParseUtils;
import org.hippoecm.repository.api.HippoNodeType;
import org.slf4j.Logger;
@@ -191,7 +193,12 @@
String nodePath = null;
try {
nodePath = node.getPath();
- String content = ParseUtils.getStringContent(inputStream, getTikaConfig(), MIME_TYPE_PDF);
+ Tika tika = new Tika();
+ // Some sensible memory safe value here, or make it configurable
+ int length = 5 * 1024 * 1024;
+ tika.setMaxStringLength(length);
+ Metadata metadata = new Metadata();
+ String content = tika.parseToString(inputStream, metadata);
byteInputStream = new ByteArrayInputStream(content.getBytes());
node.setProperty(HippoNodeType.HIPPO_TEXT, getValueFactory(node).createBinary(byteInputStream));
} catch (IOException e) {
Index: src/test/java/org/hippoecm/frontend/editor/plugins/resource/ResourceHelperTest.java
===================================================================
--- src/test/java/org/hippoecm/frontend/editor/plugins/resource/ResourceHelperTest.java (revision 0)
+++ src/test/java/org/hippoecm/frontend/editor/plugins/resource/ResourceHelperTest.java (working copy)
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2012 Hippo.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.hippoecm.frontend.editor.plugins.resource;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.Tika;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.junit.After;
+import org.junit.AfterClass;
+import static org.junit.Assert.*;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ *
+ * @author obourgeois
+ */
+public class ResourceHelperTest {
+
+ public ResourceHelperTest() {
+ }
+
+ @BeforeClass
+ public static void setUpClass() {
+ }
+
+ @AfterClass
+ public static void tearDownClass() {
+ }
+
+ @Before
+ public void setUp() {
+ }
+
+ @After
+ public void tearDown() {
+ }
+
+ /**
+ * Test of handlePdfAndSetHippoTextProperty method, of class ResourceHelper.
+ * CMS7-6516: Exceptions logged in terminal[...]
+ */
+ @Test
+ public void testHandlePdf() {
+ InputStream inputStream = null;
+ ByteArrayInputStream byteInputStream = null;
+ String content = null;
+
+ //Should not throw SaxParseException
+ inputStream = getClass().getResourceAsStream("/test-tika.pdf");
+ byteInputStream = null;
+ content = extractContent(inputStream, byteInputStream, 10);
+ assertEquals(content.length(), 10);
+
+
+ inputStream = getClass().getResourceAsStream("/test-tika.pdf");
+ byteInputStream = null;
+ content = extractContent(inputStream, byteInputStream, 5*1024*1024);
+ assertNotNull(content);
+ }
+
+ private String extractContent(InputStream inputStream, ByteArrayInputStream byteInputStream, int length) {
+ String content = null;
+ try {
+ Tika tika = new Tika();
+ tika.setMaxStringLength(length);
+ Metadata metadata = new Metadata();
+ content = tika.parseToString(inputStream, metadata);
+ byteInputStream = new ByteArrayInputStream(content.getBytes());
+ } catch (IOException e) {
+ } catch (TikaException e) {
+ e.printStackTrace();
+ fail();
+ } catch (Throwable e) {
+ e.printStackTrace();
+ fail();
+ } finally {
+ IOUtils.closeQuietly(byteInputStream);
+ IOUtils.closeQuietly(inputStream);
+ return content;
+ }
+
+ }
+}
Index: src/test/java/org/hippoecm/frontend/editor/plugins/resource/ResourceHelperTest.java
===================================================================
--- src/test/java/org/hippoecm/frontend/editor/plugins/resource/ResourceHelperTest.java (revision 0)
+++ src/test/java/org/hippoecm/frontend/editor/plugins/resource/ResourceHelperTest.java (working copy)
Property changes on: src/test/java/org/hippoecm/frontend/editor/plugins/resource/ResourceHelperTest.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: src/test/resources/test-tika.pdf
===================================================================
Cannot display: file marked as a binary type.
svn:mime-type = application/pdf
Index: src/test/resources/test-tika.pdf
===================================================================
--- src/test/resources/test-tika.pdf (revision 0)
+++ src/test/resources/test-tika.pdf (working copy)
Property changes on: src/test/resources/test-tika.pdf
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+application/pdf
\ No newline at end of property