本文整理匯總了Java中org.apache.tika.metadata.Metadata.add方法的典型用法代碼示例。如果您正苦於以下問題:Java Metadata.add方法的具體用法?Java Metadata.add怎麽用?Java Metadata.add使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.tika.metadata.Metadata
的用法示例。
在下文中一共展示了Metadata.add方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: findMediaType
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/**
* Finds media type (through Apache Tika library), based on filename and magic numbers.
* @throws IOException
*/
public static MediaType findMediaType(InputStream is, String fileName) throws IOException {
BufferedInputStream bis = new BufferedInputStream(is);
try {
AutoDetectParser parser = new AutoDetectParser();
Detector detector = parser.getDetector();
Metadata md = new Metadata();
md.add(Metadata.RESOURCE_NAME_KEY, fileName);
MediaType mediaType = detector.detect(bis, md);
return mediaType;
} finally {
try {
bis.close();
} catch (IOException e) {
;
}
}
}
示例2: convertWordDocumentIntoHtml
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/**
* Converts a .docx document into HTML markup. This code
* is based on <a href="http://stackoverflow.com/a/9053258/313554">this StackOverflow</a> answer.
*
* @param wordDocument The converted .docx document.
* @return
*/
public ConvertedDocumentDTO convertWordDocumentIntoHtml(MultipartFile wordDocument) {
LOGGER.info("Converting word document: {} into HTML", wordDocument.getOriginalFilename());
try {
InputStream input = wordDocument.getInputStream();
Parser parser = new OOXMLParser();
StringWriter sw = new StringWriter();
SAXTransformerFactory factory = (SAXTransformerFactory)
SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, "utf-8");
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
handler.setResult(new StreamResult(sw));
Metadata metadata = new Metadata();
metadata.add(Metadata.CONTENT_TYPE, "text/html;charset=utf-8");
parser.parse(input, handler, metadata, new ParseContext());
return new ConvertedDocumentDTO(wordDocument.getOriginalFilename(), sw.toString());
}
catch (IOException | SAXException | TransformerException | TikaException ex) {
LOGGER.error("Conversion failed because an exception was thrown", ex);
throw new DocumentConversionException(ex.getMessage(), ex);
}
}
示例3: getMimeType
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
public static String getMimeType(byte[] data, String fileName) throws Throwable {
TikaInputStream tikaStream = null;
Metadata metadata = new Metadata();
metadata.add(Metadata.RESOURCE_NAME_KEY, fileName);
try {
tikaStream = TikaInputStream.get(data, metadata);
return detector.detect(tikaStream, metadata).toString();
} catch (Throwable t) {
throw t;
} finally {
if (tikaStream != null) {
try {
tikaStream.close();
} catch (IOException e) {
}
}
}
}
示例4: init
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
private void init(String _name, String _ext, InputStream is) {
if (Strings.isEmpty(_ext)) {
int idx = _name.lastIndexOf('.');
name = idx < 0 ? _name : _name.substring(0, idx);
ext = getFileExt(_name);
} else {
name = _name;
ext = _ext.toLowerCase();
}
Metadata md = new Metadata();
md.add(RESOURCE_NAME_KEY, String.format(FILE_NAME_FMT, name, ext));
try {
mime = tika.getDetector().detect(is == null ? null : TikaInputStream.get(is), md);
} catch (Throwable e) {
mime = null;
log.error("Unexpected exception while detecting mime type", e);
}
}
示例5: parse
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
public String parse(ImportDocument doc) throws IOException, SAXException, TikaException {
String data = doc.getData();
InputStream is = new ByteArrayInputStream(data.getBytes("UTF-8"));
try {
ContentHandler handler = new ToTextContentHandler();
Metadata metadata = new Metadata();
metadata.add(Metadata.RESOURCE_NAME_KEY, doc.getUrl().toExternalForm());
metadata.add(Metadata.CONTENT_ENCODING, new InputStreamReader(is).getEncoding());
parser.parse(is, handler, metadata); //, new ParseContext()
handler.endDocument();
return handler.toString();
} finally {
is.close();
}
}
示例6: process
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@Override
protected void process(URL parseUrl, ObjectOutputStream out)
throws IOException, SAXException, TikaException {
URL url = URLTools.getAuthenticatedUrl(parseUrl);
URLConnection conn = url.openConnection();
InputStream in = conn.getInputStream();
InputStreamReader ir = new InputStreamReader(in);
try {
AutoDetectParser parser = new AutoDetectParser();
ContentHandler handler = new StreamingToRDFContentHandler(writer, out);
Metadata metadata = new Metadata();
metadata.add(Metadata.RESOURCE_NAME_KEY, url.toExternalForm());
metadata.add(Metadata.CONTENT_ENCODING, ir.getEncoding());
parser.parse(in, handler, metadata, new ParseContext());
} catch(Exception e) {
throw new TikaException(e.getMessage(), e);
} finally {
in.close();
out.close();
ir.close();
}
}
示例7: prepareMetadataWithConfigParams
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@Override
public void prepareMetadataWithConfigParams(Metadata metadata)
{
if (metadata == null)
{
return;
}
boolean shouldParseShapes = getBooleanProperty(PARSE_SHAPE_PROP_STRING, TIKA_PARSER_PARSE_SHAPES_DEFAULT_VALUE);
metadata.add(TikaMetadataKeys.TIKA_PARSER_PARSE_SHAPES_KEY, Boolean.toString(shouldParseShapes));
if (logger.isDebugEnabled())
{
logger.debug("Tika metadata options passed to tika parser: " + metadata);
}
}
示例8: getMetadata
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
static Metadata getMetadata() {
Metadata m = new Metadata();
m.add("Author", "BeamTikaUser");
m.add("Author", "BeamTikaUser2");
m.add("Date", "2017-09-01");
return m;
}
示例9: parse
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
public void parse(InputStream inputStream, ContentHandler contentHandler,
Metadata metadata, ParseContext parseContext)
throws IOException, SAXException, TikaException {
this.config = parseContext.get(ProfileParserConfig.class, config);
initialize(this.config.getAgeProfilerModelUrl(),
this.config.getGenderProfilerModelUrl());
if (!isAvailable()) {
return;
}
ProfileExtractor extractor = null;
try {
extractor = new ProfileExtractor(this.ageProfiler, this.genderProfiler);
} catch (Exception e) {
LOG.warning("Profiler setup failed: " + e);
return;
}
Profile profile = extractor.getProfileFromInput(inputStream);
metadata.add("Author_AGE", profile.getAgeRange());
metadata.add("Author_GENDER", profile.getGender());
metadata.add("Author_"+ TraitProfiler.TRAITS.TRAIT_AGREEABLE.name(),
Double.toString(profile.getTraits().get(0)));
metadata.add("Author_"+ TraitProfiler.TRAITS.TRAIT_CONSCIENTIOUS.name(),
Double.toString(profile.getTraits().get(1)));
metadata.add("Author_"+ TraitProfiler.TRAITS.TRAIT_EXTROVERT.name(),
Double.toString(profile.getTraits().get(2)));
metadata.add("Author_"+ TraitProfiler.TRAITS.TRAIT_OPEN.name(),
Double.toString(profile.getTraits().get(3)));
metadata.add("Author_"+ TraitProfiler.TRAITS.TRAIT_STABLE.name(),
Double.toString(profile.getTraits().get(4)));
}
示例10: from
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/**
* Detects the {@link MimeType} from the bytes stream
* @see http://tika.apache.org/1.4/detection.html
* @param is
* @param fileName
* @param contentType
* @return
* @throws IOException
*/
public static MimeType from(final InputStream is,
final String fileName,final String contentType) throws IOException {
Metadata md = new Metadata();
if (Strings.isNOTNullOrEmpty(fileName)) md.add(TikaMetadataKeys.RESOURCE_NAME_KEY ,fileName);
if (Strings.isNOTNullOrEmpty(contentType)) md.add(HttpHeaders.CONTENT_TYPE,contentType);
Tika tika = new Tika();
String mimeTypeStr = tika.detect(is,fileName);
return MimeType.forName(mimeTypeStr);
}
示例11: createMetadata
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
private Metadata createMetadata(String fileName, String contentType) {
Metadata metadata = new Metadata();
if(fileName != null) {
metadata.add(Metadata.RESOURCE_NAME_KEY, fileName);
}
if(contentType != null) {
metadata.add(Metadata.CONTENT_TYPE, contentType);
}
return metadata;
}
示例12: testPrintReport
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/**
* Test method for {@link crawlercommons.fetcher.FetchedResult#report()}.
* This does not actually test anything but simply allows us to see what a
* generated report would look like.
* @throws UnsupportedEncodingException
*/
@Test
public void testPrintReport() throws UnsupportedEncodingException {
Metadata headerMetadata = new Metadata();
headerMetadata.add(Metadata.CONTENT_DISPOSITION, "This is content disposition");
headerMetadata.add(Metadata.CONTENT_ENCODING, "This is the encoding");
headerMetadata.add(Metadata.CONTENT_LANGUAGE, "This is some language");
headerMetadata.add(Metadata.CONTENT_LENGTH, "This is the length");
Payload load = new Payload();
load.put("Item 1", 1234);
load.put("Item 2", 5678);
load.put("Item 3", 1357);
load.put("Item 4", 2468);
FetchedResult result = new FetchedResult
// (, , , headers, content, contentType, responseRate, payload,
// newBaseUrl, numRedirects, hostAddress, statusCode, reasonPhrase)
(
"http://en.wikipedia.org/wiki/Glasgow", // baseUrl
"http://en.wikipedia.org/wiki/Glasgow", // redirectedUrl
System.currentTimeMillis(), // fetchTime
headerMetadata, new String("Glasgow (/ˈɡlɑːzɡoʊ, ˈɡlæz-/;[4] Scots: Glesca; Scottish Gaelic: Glaschu) "
+ "is the largest city in Scotland, and the third largest in the United Kingdom.").getBytes("UTF-8"), "ScotsText", 2014, load, "http://en.wikipedia.org/wiki/Glasgow",
0, "wikipedia.org", 200, "");
LOG.error(result.report());
}
示例13: parse
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@Override
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context) throws IOException,
SAXException, TikaException {
/*----------------configure this parser by ParseContext Object---------------------*/
GeoParserConfig localconfig = context.get(GeoParserConfig.class,
defaultconfig);
String nerModelPath = localconfig.getNERPath();
gazetteerPath = localconfig.getGazetterPath();
/*----------------get locationNameEntities and best nameEntity for the input stream---------------------*/
NameEntityExtractor extractor = new NameEntityExtractor(nerModelPath);
extractor.getAllNameEntitiesfromInput(stream);
extractor.getBestNameEntity();
ArrayList<String> locationNameEntities = extractor.locationNameEntities;
String bestner = extractor.bestNameEntity;
/*----------------build lucene search engine for the gazetteer file,
*------------------------resolve geonames for each ner, store results in a hashmap---------------------*/
GeoNameResolver resolver = new GeoNameResolver();
resolver.buildIndex(gazetteerPath);
HashMap<String, ArrayList<String>> resolvedGeonames = resolver
.searchGeoName(locationNameEntities);
/*----------------store locationNameEntities and their geonames in a geotag, each input has one geotag---------------------*/
GeoTag geotag = new GeoTag();
geotag.toGeoTag(resolvedGeonames, bestner);
/* add resolved entities in metadata */
metadata.add("Geographic_NAME", geotag.Geographic_NAME);
metadata.add("Geographic_LONGITUDE", geotag.Geographic_LONGTITUDE);
metadata.add("Geographic_LATITUDE", geotag.Geographic_LATITUDE);
for (int i = 0; i < geotag.alternatives.size(); ++i) {
GeoTag alter = (GeoTag) geotag.alternatives.get(i);
metadata.add("Optional_NAME" + (i + 1), alter.Geographic_NAME);
metadata.add("Optional_LONGITUDE" + (i + 1),
alter.Geographic_LONGTITUDE);
metadata.add("Optional_LATITUDE" + (i + 1),
alter.Geographic_LATITUDE);
}
}
示例14: detect
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
private org.springframework.http.MediaType detect(final TikaInputStream stream, final String fileName) throws IOException {
final Metadata metadata = new Metadata();
metadata.add(RESOURCE_NAME_KEY, fileName);
final MediaType mediaType = detector.detect(requireNonNull(stream), metadata);
return toSpringMediaType(mediaType);
}
示例15: getMimeType
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
public static MediaType getMimeType(File file) throws IOException {
Metadata md = new Metadata();
md.add(Metadata.RESOURCE_NAME_KEY, file.getAbsolutePath());
return getMimeType(new FileInputStream(file), md);
}