當前位置: 首頁>>代碼示例>>Java>>正文


Java Metadata.set方法代碼示例

本文整理匯總了Java中org.apache.tika.metadata.Metadata.set方法的典型用法代碼示例。如果您正苦於以下問題:Java Metadata.set方法的具體用法?Java Metadata.set怎麽用?Java Metadata.set使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.tika.metadata.Metadata的用法示例。


在下文中一共展示了Metadata.set方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: detectMimeTypes

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@Nonnull
@Override
public List<String> detectMimeTypes(final InputStream input, @Nullable final String fileName) throws IOException {
  checkNotNull(input);

  List<String> detected = Lists.newArrayList();
  Metadata metadata = new Metadata();
  if (fileName != null) {
    metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
  }

  MediaType mediaType;
  try (final TikaInputStream tis = TikaInputStream.get(input)) {
    mediaType = detector.detect(tis, metadata);
  }

  // unravel to least specific
  unravel(detected, mediaType);

  if (detected.isEmpty()) {
    detected.add(MimeTypes.OCTET_STREAM);
  }

  return detected;
}
 
開發者ID:sonatype,項目名稱:nexus-public,代碼行數:26,代碼來源:DefaultMimeSupport.java

示例2: testWritesISO8601Dates

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@Test
public void testWritesISO8601Dates() throws IOException {
	final SpewerStub spewer = new SpewerStub();
	final Document document = factory.create("test.txt");
	final Metadata metadata = document.getMetadata();
	final FieldNames fields = spewer.getFields();

	// TODO: this should go in a separate test for the MetadataTransformer.
	final String[] dates = {"2011-12-03+01:00", "2015-06-03", "Tue Jan 27 17:03:21 2004", "19106-06-07T08:00:00Z"};
	final String[] isoDates = {"2011-12-03T12:00:00Z", "2015-06-03T12:00:00Z", "2004-01-27T17:03:21Z",
			"+19106-06-07T08:00:00Z"};
	int i = 0;

	for (String date: dates) {
		metadata.set(Office.CREATION_DATE, date);
		spewer.writeMetadata(document);

		Assert.assertEquals(date, spewer.metadata.get(fields.forMetadata(Office.CREATION_DATE.getName())));
		Assert.assertEquals(isoDates[i++],
				spewer.metadata.get(fields.forMetadataISODate(Office.CREATION_DATE.getName())));

		// Reset the store of written metadata on each iteration.
		spewer.close();
	}
}
 
開發者ID:ICIJ,項目名稱:extract,代碼行數:26,代碼來源:SpewerTest.java

示例3: initSize

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
private static ProcessResult initSize(BaseFileItem f, File img, String mime) {
	ProcessResult res = new ProcessResult();
	res.setProcess("get image dimensions :: " + f.getId());
	final Parser parser = new ImageParser();
	try (InputStream is = new FileInputStream(img)) {
		Metadata metadata = new Metadata();
		metadata.set(CONTENT_TYPE, mime);
		parser.parse(is, new DefaultHandler(), metadata, new ParseContext());
		f.setWidth(Integer.valueOf(metadata.get(TIFF.IMAGE_WIDTH)));
		f.setHeight(Integer.valueOf(metadata.get(TIFF.IMAGE_LENGTH)));
		res.setExitCode(ZERO);
	} catch (Exception e) {
		log.error("Error while getting dimensions", e);
		res.setError("Error while getting dimensions");
		res.setException(e.getMessage());
		res.setExitCode(-1);
	}
	return res;
}
 
開發者ID:apache,項目名稱:openmeetings,代碼行數:20,代碼來源:ImageConverter.java

示例4: testTransformValue

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/**
 * Test that the ContentHandler properly strips the illegal characters
 */
@Test
public void testTransformValue() {
  String fieldName = "user_name";
  assertFalse("foobar".equals(getFoobarWithNonChars()));

  Metadata metadata = new Metadata();
  // load illegal char string into a metadata field and generate a new document,
  // which will cause the ContentHandler to be invoked.
  metadata.set(fieldName, getFoobarWithNonChars());
  StripNonCharSolrContentHandlerFactory contentHandlerFactory =
    new StripNonCharSolrContentHandlerFactory(DateUtil.DEFAULT_DATE_FORMATS);
  IndexSchema schema = h.getCore().getLatestSchema();
  SolrContentHandler contentHandler =
    contentHandlerFactory.createSolrContentHandler(metadata, new MapSolrParams(new HashMap()), schema);
  SolrInputDocument doc = contentHandler.newDocument();
  String foobar = doc.getFieldValue(fieldName).toString();
  assertTrue("foobar".equals(foobar));
}
 
開發者ID:europeana,項目名稱:search,代碼行數:22,代碼來源:SolrCellMorphlineTest.java

示例5: writeFile

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@Override
void writeFile(final String path, final byte[] data) throws IOException {
    ObjectMetadata bucketMetadata = new ObjectMetadata();
    bucketMetadata.setContentMD5(Md5Utils.md5AsBase64(data));
    bucketMetadata.setContentLength(data.length);
    // Give Tika a few hints for the content detection
    Metadata tikaMetadata = new Metadata();
    tikaMetadata.set(Metadata.RESOURCE_NAME_KEY, FilenameUtils.getName(FilenameUtils.normalize(path)));
    // Fire!
    try (InputStream bis = TikaInputStream.get(data, tikaMetadata)) {
        bucketMetadata.setContentType(TIKA_DETECTOR.detect(bis, tikaMetadata).toString());
        s3.putObject(bucket, resolveKey(path), bis, bucketMetadata);
    }
}
 
開發者ID:berlam,項目名稱:github-bucket,代碼行數:15,代碼來源:TransportAmazonLambdaS3.java

示例6: walk

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
private boolean walk(Iterator<S3ObjectSummary> iter, ObjectId file, String path) throws IOException {
    byte[] content;
    byte[] newHash;
    LOG.debug("Start processing file: {}", path);
    try (DigestInputStream is = new DigestInputStream(repository.open(file).openStream(), DigestUtils.getMd5Digest())) {
        // Get content
        content = IOUtils.toByteArray(is);
        // Get hash
        newHash = is.getMessageDigest().digest();
    }
    if (isUploadFile(iter, path, Hex.encodeHexString(newHash))) {
        LOG.info("Uploading file: {}", path);
        ObjectMetadata bucketMetadata = new ObjectMetadata();
        bucketMetadata.setContentMD5(Base64.encodeAsString(newHash));
        bucketMetadata.setContentLength(content.length);
        // Give Tika a few hints for the content detection
        Metadata tikaMetadata = new Metadata();
        tikaMetadata.set(Metadata.RESOURCE_NAME_KEY, FilenameUtils.getName(FilenameUtils.normalize(path)));
        // Fire!
        try (InputStream bis = TikaInputStream.get(content, tikaMetadata)) {
            bucketMetadata.setContentType(TIKA_DETECTOR.detect(bis, tikaMetadata).toString());
            s3.putObject(bucket.getName(), path, bis, bucketMetadata);
            return true;
        }
    }
    LOG.info("Skipping file (same checksum): {}", path);
    return false;
}
 
開發者ID:berlam,項目名稱:github-bucket,代碼行數:29,代碼來源:RepositoryS3.java

示例7: detectMimeType

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
public static String detectMimeType(final String filePath) {
    TikaInputStream tikaIS = null;
    try {
        File targetFile = new File(filePath);
        tikaIS = TikaInputStream.get(targetFile);

    /*
     * You might not want to provide the file's name. If you provide an Excel
     * document with a .xls extension, it will get it correct right away; but
     * if you provide an Excel document with .doc extension, it will guess it
     * to be a Word document
     */
        final Metadata metadata = new Metadata();
        metadata.set(Metadata.RESOURCE_NAME_KEY, targetFile.getName());

        return DETECTOR.detect(tikaIS, metadata).toString();
    } catch (IOException ex) {
        return "UNKNOWN";
    } finally {
        if (tikaIS != null) {
            try {
                tikaIS.close();
            } catch (IOException e) {
                Timber.d(e, e.getMessage(), LOG_TAG);
            }
        }
    }
}
 
開發者ID:crysehillmes,項目名稱:smoothnovelreader,代碼行數:29,代碼來源:NovelBookShelfFragment.java

示例8: withInputMetadata

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/** Sets the input metadata for {@link Parser#parse}. */
public ParseFiles withInputMetadata(Metadata metadata) {
  Metadata inputMetadata = this.getInputMetadata();
  if (inputMetadata != null) {
    for (String name : metadata.names()) {
      inputMetadata.set(name, metadata.get(name));
    }
  } else {
    inputMetadata = metadata;
  }
  return toBuilder().setInputMetadata(inputMetadata).build();
}
 
開發者ID:apache,項目名稱:beam,代碼行數:13,代碼來源:TikaIO.java

示例9: processElement

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@ProcessElement
public void processElement(ProcessContext c) {
  ParseResult result = c.element();
  Metadata m = new Metadata();
  // Files contain many metadata properties. This function drops all but the "Author"
  // property manually added to "apache-beam-tika.odt" resource only to make
  // the tests simpler
  if (result.getFileLocation().endsWith("valid/apache-beam-tika.odt")) {
    m.set("Author", result.getMetadata().get("Author"));
  }
  ParseResult newResult = ParseResult.success(result.getFileLocation(), result.getContent(), m);
  c.output(newResult);
}
 
開發者ID:apache,項目名稱:beam,代碼行數:14,代碼來源:TikaIOTest.java

示例10: get

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@GetMapping("/**")
@ResponseBody
public void get(HttpServletRequest request, HttpServletResponse response) throws IOException {
    String path = request.getServletPath();
    log.info("GET " + path);

    if (path.equals("/")) {
        path = "/index.html";
    }

    File f = ResourceManager.getResource(path.substring(1));
    boolean isInPublic = f.getAbsolutePath().startsWith(ResourceManager.PUBLIC_DIR.getAbsolutePath());
    //Verify that the file requested is in a public directory
    if (!f.getParentFile().getAbsolutePath().equals(ResourceManager.getDataDir().getAbsolutePath())
            && !isInPublic) {
        throw new FileNotFoundException();
    }

    if (!f.exists()) {
        throw new FileNotFoundException();
    }

    Metadata metadata = new Metadata();
    metadata.set(Metadata.RESOURCE_NAME_KEY, f.toString());

    MediaType mediaType = tika.getDetector().detect(
            TikaInputStream.get(f.toPath()), metadata);

    response.setContentType(mediaType.toString());
    response.setContentLengthLong(f.length());

    IOUtils.copy(new FileInputStream(f), response.getOutputStream());
    response.flushBuffer();
}
 
開發者ID:Frederikam,項目名稱:fred.moe,代碼行數:35,代碼來源:SpringController.java

示例11: parse

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@Override
public void parse(final InputStream stream, final ContentHandler handler, final Metadata metadata,
                  final ParseContext context) throws SAXException, IOException, TikaException {
	final Parser parser;
	final long size;
	String value = metadata.get(Metadata.CONTENT_LENGTH);

	if (null != value && !value.isEmpty()) {
		size = Long.valueOf(value);
	} else {
		try (final TikaInputStream tis = TikaInputStream.get(stream)) {
			size = tis.getLength();
		}

		metadata.set(Metadata.CONTENT_LENGTH, Long.toString(size));
	}

	// If the file is not empty, throw a parse error.
	// Otherwise, output an empty document.
	if (size > 0) {
		parser = ErrorParser.INSTANCE;
	} else {
		metadata.set(Metadata.CONTENT_TYPE, "application/octet-stream");
		parser = EmptyParser.INSTANCE;
	}

	parser.parse(stream, handler, metadata, context);
}
 
開發者ID:ICIJ,項目名稱:extract,代碼行數:29,代碼來源:FallbackParser.java

示例12: getMetadata

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/**
 * Utility method that returns a {@link Metadata} instance for a document with the given name.
 *
 * @param name resource name (or <code>null</code>)
 * @return metadata instance
 */
private static Metadata getMetadata(final String name) {
	final Metadata metadata = new Metadata();

	if (name != null && name.length() > 0) {
		metadata.set(Metadata.RESOURCE_NAME_KEY, name);
	}

	return metadata;
}
 
開發者ID:ICIJ,項目名稱:extract,代碼行數:16,代碼來源:ParsingReader.java

示例13: writeEmbed

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
private void writeEmbed(final TikaInputStream tis, final EmbeddedDocument embed, final String name) throws IOException {
	final Path destination = outputPath.resolve(embed.getHash());
	final Path source;

	final Metadata metadata = embed.getMetadata();
	final Object container = tis.getOpenContainer();

	// If the input is a container, write it to a temporary file so that it can then be copied atomically.
	// This happens with, for example, an Outlook Message that is an attachment of another Outlook Message.
	if (container instanceof DirectoryEntry) {
		try (final TemporaryResources tmp = new TemporaryResources();
		     final POIFSFileSystem fs = new POIFSFileSystem()) {
			source = tmp.createTempFile();
			saveEntries((DirectoryEntry) container, fs.getRoot());

			try (final OutputStream output = Files.newOutputStream(source)) {
				fs.writeFilesystem(output);
			}
		}
	} else {
		source = tis.getPath();
	}

	// Set the content-length as it isn't (always?) set by Tika for embeds.
	if (null == metadata.get(Metadata.CONTENT_LENGTH)) {
		metadata.set(Metadata.CONTENT_LENGTH, Long.toString(Files.size(source)));
	}

	// To prevent massive duplication and because the disk is only a storage for underlying data, save using the
	// straight hash as a filename.
	try {
		Files.copy(source, destination);
	} catch (final FileAlreadyExistsException e) {
		if (Files.size(source) != Files.size(destination)) {
			Files.copy(source, destination, StandardCopyOption.REPLACE_EXISTING);
		} else {
			logger.info("Temporary file for document \"{}\" in \"{}\" already exists.", name, root);
		}
	}
}
 
開發者ID:ICIJ,項目名稱:extract,代碼行數:41,代碼來源:EmbedSpawner.java

示例14: generateHTML

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/**
 * Asks Tika to translate the contents into HTML
 */
private void generateHTML(Parser p, RenderingContext context)
{
   ContentReader contentReader = context.makeContentReader();
   
   // Setup things to parse with
   StringWriter sw = new StringWriter();
   ContentHandler handler = buildContentHandler(sw, context);
   
   // Tell Tika what we're dealing with
   Metadata metadata = new Metadata();
   metadata.set(
         Metadata.CONTENT_TYPE, 
         contentReader.getMimetype()
   );
   metadata.set(
         Metadata.RESOURCE_NAME_KEY, 
         nodeService.getProperty( 
               context.getSourceNode(),
               ContentModel.PROP_NAME
         ).toString()
   );
   if (metadataExtracterConfig != null)
   {
      metadataExtracterConfig.prepareMetadataWithConfigParams(metadata);
   }
   
   // Our parse context needs to extract images
   ParseContext parseContext = new ParseContext();
   parseContext.set(Parser.class, new TikaImageExtractingParser(context));
   
   // Parse
   try {
      p.parse(
            contentReader.getContentInputStream(),
            handler, metadata, parseContext
      );
   } catch(Exception e) {
      throw new RenditionServiceException("Tika HTML Conversion Failed", e);
   }
   
   // As a string
   String html = sw.toString();
   
   // If we're doing body-only, remove all the html namespaces
   //  that will otherwise clutter up the document
   boolean bodyOnly = context.getParamWithDefault(PARAM_BODY_CONTENTS_ONLY, false);
   if(bodyOnly) {
      html = html.replaceAll("<\\?xml.*?\\?>", "");
      html = html.replaceAll("<p xmlns=\"http://www.w3.org/1999/xhtml\"","<p");
      html = html.replaceAll("<h(\\d) xmlns=\"http://www.w3.org/1999/xhtml\"","<h\\1");
      html = html.replaceAll("<div xmlns=\"http://www.w3.org/1999/xhtml\"","<div");
      html = html.replaceAll("<table xmlns=\"http://www.w3.org/1999/xhtml\"","<table");
      html = html.replaceAll("&#13;","");
   }
   
   // Save it
   ContentWriter contentWriter = context.makeContentWriter();
   contentWriter.setMimetype("text/html");
   contentWriter.putContent( html );
}
 
開發者ID:Alfresco,項目名稱:alfresco-repository,代碼行數:64,代碼來源:HTMLRenderingEngine.java

示例15: testSupportedMimetypes

import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/**
 * Test several different files
 * Note - doesn't use extractFromMimetype
 */
public void testSupportedMimetypes() throws Exception
{
    String[] testFiles = new String[] {
          ".doc", ".docx", ".xls", ".xlsx",
          ".ppt", ".pptx", 
          //".vsd", // Our sample file lacks suitable metadata
          "2010.dwg",
          "2003.mpp", "2007.mpp",
          ".pdf",
          ".odt",
    };
       
    AutoDetectParser ap = new AutoDetectParser();
    for (String fileBase : testFiles)
    {
       String filename = "quick" + fileBase;
       URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename);
       File file = new File(url.getFile());
       
       // Cheat and ask Tika for the mime type!
       Metadata metadata = new Metadata();
       metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
       MediaType mt = ap.getDetector().detect(TikaInputStream.get(file), metadata);
       String mimetype = mt.toString();
       
       if (logger.isDebugEnabled())
       {
          logger.debug("Detected mimetype " + mimetype + " for quick test file " + filename);
       }

       // Have it processed
       Map<QName, Serializable> properties = extractFromFile(file, mimetype);
       
       // check we got something
       assertFalse("extractFromMimetype should return at least some properties, " +
       		"none found for " + mimetype + " - " + filename,
          properties.isEmpty());
       
       // check common metadata
       testCommonMetadata(mimetype, properties);
       // check file-type specific metadata
       testFileSpecificMetadata(mimetype, properties);
    }
}
 
開發者ID:Alfresco,項目名稱:alfresco-repository,代碼行數:49,代碼來源:TikaAutoMetadataExtracterTest.java


注:本文中的org.apache.tika.metadata.Metadata.set方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。