本文整理匯總了Java中org.apache.tika.metadata.Metadata.names方法的典型用法代碼示例。如果您正苦於以下問題:Java Metadata.names方法的具體用法?Java Metadata.names怎麽用?Java Metadata.names使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.tika.metadata.Metadata
的用法示例。
在下文中一共展示了Metadata.names方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: extractMetaData
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
public static TreeMap<String, String> extractMetaData(InputStream input) throws IOException {
TreeMap<String, String> treeMap = new TreeMap<String, String>();
try {
ContentHandler handler = new DefaultHandler();
Metadata metadata = new Metadata();
Parser parser = new AutoDetectParser();
parser.parse(input, handler, metadata, new ParseContext());
for (int i = 0; i < metadata.names().length; i++) {
String name = metadata.names()[i];
treeMap.put(name, stripWhiteSpace(metadata.get(name)));
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (input != null)
input.close();
}
return treeMap;
}
示例2: extractStringMetaData
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
public static String extractStringMetaData(InputStream input) throws IOException {
String result="";
try {
ContentHandler handler = new DefaultHandler();
Metadata metadata = new Metadata();
Parser parser = new AutoDetectParser();
parser.parse(input, handler, metadata, new ParseContext());
for (int i = 0; i < metadata.names().length; i++) {
String name = metadata.names()[i];
result+=name.toUpperCase()+" : "+stripWhiteSpace(metadata.get(name))+"\n";
}
} catch (Exception e) {
} finally {
if (input != null)
input.close();
}
return result;
}
示例3: generateDocument
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
public static LWDocument generateDocument(LWDocument document, Metadata metadata, ContentHandler content) {
if (metadata != null) {
for (String name : metadata.names()) {
String value = metadata.get(name);
document.addField(name, value);
}
}
if (content != null) {
String body = content.toString();
if (body.length() > MAX_TERM_LENGTH_UTF) {
body = body.substring(0, MAX_TERM_LENGTH_UTF);
}
document.addField("body", body);
}
return document;
}
示例4: Convert
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
protected String Convert(InputStream Bytes) throws PDException
{
try {
ContentHandler textHandler=new BodyContentHandler(-1);
Metadata metadata=new Metadata();
Parser parser=new AutoDetectParser();
ParseContext context=new ParseContext();
parser.parse(Bytes, textHandler, metadata, context);
FileMetadata="";
for (String key : metadata.names())
FileMetadata+=key+"="+metadata.get(key)+"\n";
FullText=textHandler.toString();
} catch (Exception ex)
{
PDException.GenPDException("Error_extracting_content_from_doc", ex.getLocalizedMessage());
}
return(FullText);
}
示例5: readXlsx
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
public static ExcelData readXlsx(String xlsxFilePath)
throws IOException, InvalidFormatException, XmlException, TikaException, SAXException {
BodyContentHandler bcHandler = new BodyContentHandler();
Metadata metadata = new Metadata();
FileInputStream inputStream = new FileInputStream(new File(xlsxFilePath));
ParseContext pcontext = new ParseContext();
OOXMLParser parser = new OOXMLParser();
parser.parse(inputStream, bcHandler, metadata, pcontext);
if (DEBUG_PRINT_META_DATA) {
System.err.println("Metadata:");
for (String name : metadata.names())
System.out.println(name + "\t:\t" + metadata.get(name));
}
ExcelData spreedsheet = new ExcelData(bcHandler.toString());
return spreedsheet;
}
示例6: doProcessStream
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@Override
public void doProcessStream(InputStream stream, String source, JCas jCas) throws IOException {
super.doProcessStream(stream, source, jCas);
try {
BodyContentHandler textHandler = new BodyContentHandler(Integer.MAX_VALUE);
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
AutoDetectParser autoParser = new AutoDetectParser();
autoParser.parse(stream, textHandler, metadata, context);
jCas.setDocumentText(textHandler.toString());
for (String name : metadata.names()) {
addMetadata(jCas, name, metadata.get(name));
}
} catch (SAXException | TikaException e) {
getMonitor().warn("Couldn't parse metadata from '{}'", source, e);
if (Strings.isNullOrEmpty(jCas.getDocumentText())) {
jCas.setDocumentText(CORRUPT_FILE_TEXT);
}
}
}
示例7: parseResponseHeaders
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
private void parseResponseHeaders(Metadata headerAsMetadata) {
Map<String, List<String>> responseHeaders = new HashMap<>();
String[] names = headerAsMetadata.names();
if(names != null && names.length > 0) {
for(String name : names) {
List<String> values = Arrays.asList(headerAsMetadata.getValues(name));
if(values.isEmpty()) {
continue;
}
responseHeaders.put(name, values);
if("content-type".compareToIgnoreCase(name) == 0) {
this.contentType = values.get(0);
}
}
}
this.responseHeaders = responseHeaders;
}
示例8: parse
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
public ParsedData parse(InputStream stream, String fileName, String contentType) {
BodyContentHandler handler = new BodyContentHandler(MAX_CHARACTERS);
BoilerpipeContentHandler textHandler = new BoilerpipeContentHandler(handler, KeepEverythingExtractor.INSTANCE);
Metadata metadata = createMetadata(fileName, contentType);
ParseContext context = new ParseContext();
try {
parser.parse(stream, textHandler, metadata, context);
Map<String, String> metadataMap = new HashMap<String, String>();
for (String propertyName : metadata.names()) {
metadataMap.put(propertyName, metadata.get(propertyName));
}
return new ParsedData(handler.toString(), metadataMap);
} catch (IOException | SAXException | TikaException e) {
logger.error("Failed to extract metadata using Tika.", e);
return null;
}
}
示例9: createDocument
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
private Document createDocument(Corpus corpus, TikaReaderHandler handler) {
Metadata metadata = handler.getMetadata();
Document result = Document.getDocument(this, corpus, handler.getName());
for (String k : metadata.names()) {
for (String v : metadata.getValues(k)) {
result.addFeature(k, v);
}
}
return result;
}
示例10: indexDoc
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/**
* Indexes a single document
*
* @throws TikaException
* @throws SAXException
*/
public static void indexDoc(IndexWriter writer, Path file, TextArea results, long lastModified)
throws IOException, SAXException, TikaException {
AutoDetectParser parser = new AutoDetectParser();
BodyContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
try (InputStream stream = Files.newInputStream(file)) {
parser.parse(stream, handler, metadata);
Document doc = new Document();
String[] metadataNames = metadata.names();
for (String name : metadataNames)
doc.add(new TextField(name, metadata.get(name), Field.Store.YES));
doc.add(new StringField("path", file.toString(), Field.Store.YES));
doc.add(new LongPoint("modified", lastModified));
results.appendText("Title: " + metadata.get("title") + "\n");
results.appendText("Artists: " + metadata.get("xmpDM:artist") + "\n");
results.appendText("Genre: " + metadata.get("xmpDM:genre") + "\n");
results.appendText("Year: " + metadata.get("xmpDM:releaseDate") + "\n");
if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
// New index, so we just add the document (no old document can
// be there):
results.appendText("adding " + file + "\n");
writer.addDocument(doc);
} else {
// Existing index (an old copy of this document may have been
// indexed):
results.appendText("updating " + file);
writer.updateDocument(new Term("path", file.toString()), doc);
}
}
}
示例11: main
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
public static void main(final String[] args) throws IOException,
TikaException, SAXException {
File file = new File("/home/aditya/dataset/oca.pdf");
Parser parser = new AutoDetectParser();
BodyContentHandler handler = null;
Metadata metadata = new Metadata();
FileInputStream inputstream = new FileInputStream(file);
ParseContext context = new ParseContext();
parser.parse(inputstream, handler, metadata, context);
String[] metadataNames = metadata.names();
// Metadata Properties
// for (String name : metadataNames) {
//
// System.out.println(name);
// }
// Get specific metadata
System.out.println(metadata.get(MetadataProperties.TITLE));
System.out.println(metadata.get(MetadataProperties.AUTHOR));
System.out.println(metadata.get(MetadataProperties.CREATOR));
System.out.println(metadata.get(MetadataProperties.CONTENT_TYPE));
System.out.println(metadata.get(MetadataProperties.ENCRYPTION));
}
示例12: withInputMetadata
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/** Sets the input metadata for {@link Parser#parse}. */
public ParseFiles withInputMetadata(Metadata metadata) {
Metadata inputMetadata = this.getInputMetadata();
if (inputMetadata != null) {
for (String name : metadata.names()) {
inputMetadata.set(name, metadata.get(name));
}
} else {
inputMetadata = metadata;
}
return toBuilder().setInputMetadata(inputMetadata).build();
}
示例13: ParseResult
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
private ParseResult(String fileLocation, String content, Metadata metadata, Throwable error) {
checkArgument(fileLocation != null, "fileLocation can not be null");
checkArgument(content != null, "content can not be null");
checkArgument(metadata != null, "metadata can not be null");
this.fileLocation = fileLocation;
this.content = content;
this.metadata = metadata;
this.metadataNames = metadata.names();
this.error = (error == null) ? null : new SerializableThrowable(error);
}
示例14: parse
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@Override
public void parse(InputStream stream, ContentHandler ignore, Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
ContentHandler content = new BodyContentHandler();
super.parse(stream, content, metadata, context);
log.debug("Begin Document");
log.debug("Metadata: ");
for (String metadata_name : metadata.names()) {
log.debug("\t" + metadata_name + " -> " + Arrays.asList(metadata.getValues(metadata_name)));
}
log.debug("End Document");
nestedFiles.add(new AbstractMap.SimpleEntry<Metadata, ContentHandler>(metadata, content));
}
示例15: assembleExtractionResult
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
private String assembleExtractionResult(String bucket, String key, String extractedText, Metadata tikaMetadata) {
JSONObject extractJson = new JSONObject();
String contentType = tikaMetadata.get("Content-Type");
contentType = contentType != null ? contentType : "content/unknown";
String contentLength = tikaMetadata.get("Content-Length");
contentLength = contentLength != null ? contentLength : "0";
extractJson.put("Exception", null);
extractJson.put("FilePath", "s3://" + bucket + "/" + key);
extractJson.put("Text", extractedText);
extractJson.put("ContentType", contentType);
extractJson.put("ContentLength", contentLength);
JSONObject metadataJson = new JSONObject();
for( String name : tikaMetadata.names() ){
String[] elements = tikaMetadata.getValues(name);
String joined = String.join(", ", elements);
metadataJson.put(name, joined);
}
extractJson.put("Metadata", metadataJson);
return extractJson.toJSONString();
}