本文整理汇总了Java中org.apache.tika.Tika.setMaxStringLength方法的典型用法代码示例。如果您正苦于以下问题:Java Tika.setMaxStringLength方法的具体用法?Java Tika.setMaxStringLength怎么用?Java Tika.setMaxStringLength使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.tika.Tika
的用法示例。
在下文中一共展示了Tika.setMaxStringLength方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: open
import org.apache.tika.Tika; //导入方法依赖的package包/类
public void open(File file) {
Tika tika = new Tika();
tika.setMaxStringLength(999999);
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
fileName.setText(file.getName());
int dot = file.getName().lastIndexOf('.');
String saveName = file.getName().substring(0, dot) + ".txt";
try {
//long start = System.currentTimeMillis();
String text = tika.parseToString(file);
//long end = System.currentTimeMillis();
pw.println(text);
//mimeType.setText(tika.detect(file) + " (" + NumberFormat.getNumberInstance().format(end-start) + "ms)");
} catch(Exception ex){
ex.printStackTrace(pw);
}
pw.flush();
plain.setText(sw.toString());
saveFile(sw.toString(), saveName);
plain.setCaretPosition(0);
return ;
}
示例2: processDocument
import org.apache.tika.Tika; //导入方法依赖的package包/类
@Override
public Document[] processDocument(Document document) {
byte[] rawData = document.getRawData();
if (rawData == null) {
log.debug("Skipping document without data in " + getName());
return new Document[]{document};
}
try {
Tika tika = new Tika();
tika.setMaxStringLength(document.getRawData().length);
Metadata metadata = new Metadata();
try (ByteArrayInputStream bais = new ByteArrayInputStream(rawData)) {
String textContent = tika.parseToString(bais, metadata);
document.setRawData(textContent.getBytes(Charset.forName("UTF-8")));
for (String name : metadata.names()) {
document.put(sanitize(name) + plusSuffix(), metadata.get(name));
}
} catch (IOException | TikaException e) {
log.warn("Tika processing failure!", e);
// if tika can't parse it we certainly don't want random binary crap in the index
document.setStatus(Status.DROPPED);
}
} catch (Throwable t) {
boolean isAccessControl = t instanceof AccessControlException;
boolean isSecurity = t instanceof SecurityException;
if (!isAccessControl && !isSecurity) {
throw t;
} else {
System.out.println("gotcha!");
}
}
return new Document[]{document};
}
示例3: prepare
import org.apache.tika.Tika; //导入方法依赖的package包/类
@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.collector = collector;
tika = new Tika();
tika.setMaxStringLength(10 * 1024 * 1024);
inputDir = stormConf.get("inputFile").toString();
String solrUrl = stormConf.get("solrUrl").toString();
String caseId = stormConf.get("caseId").toString();
custodian = stormConf.get("custodian").toString();
solrIndex = new SolrIndex(solrUrl, caseId);
}
示例4: detectSchemaMimeType
import org.apache.tika.Tika; //导入方法依赖的package包/类
private String detectSchemaMimeType(final byte[] value) {
Tika tika = new Tika();
tika.setMaxStringLength(-1);
return tika.detect(value);
}