当前位置: 首页>>代码示例>>Java>>正文


Java WARCConstants类代码示例

本文整理汇总了Java中org.archive.io.warc.WARCConstants的典型用法代码示例。如果您正苦于以下问题:Java WARCConstants类的具体用法?Java WARCConstants怎么用?Java WARCConstants使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


WARCConstants类属于org.archive.io.warc包,在下文中一共展示了WARCConstants类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: write

import org.archive.io.warc.WARCConstants; //导入依赖的package包/类
@Override
public void write(String uri, String contentType, String hostIP,
        long fetchBeginTimeStamp, byte[] payload)
                throws java.io.IOException {
    
    String create14DigitDate = ArchiveDateConverter.getWarcDateFormat()
            .format(new Date(fetchBeginTimeStamp));
    ByteArrayInputStream in = new ByteArrayInputStream(payload);
    String blockDigest = ChecksumCalculator.calculateSha1(in);
    in = new ByteArrayInputStream(payload); // A re-read is necessary here!
    ANVLRecord namedFields = new ANVLRecord(3);
    namedFields.addLabelValue(
    WARCConstants.HEADER_KEY_BLOCK_DIGEST, "sha1:" + blockDigest);
    namedFields.addLabelValue("WARC-Warcinfo-ID", 
            generateEncapsulatedRecordID(warcInfoUID));
    namedFields.addLabelValue("WARC-IP-Address", SystemUtils.getLocalIP());
    URI recordId;
    try {
        recordId = new URI("urn:uuid:" + UUID.randomUUID().toString());
    } catch (URISyntaxException e) {
        throw new IllegalState("Epic fail creating URI from UUID!");
    }
    writer.writeResourceRecord(uri, create14DigitDate, contentType,
            recordId, namedFields, in, payload.length);
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:26,代码来源:MetadataFileWriterWarc.java

示例2: index

import org.archive.io.warc.WARCConstants; //导入依赖的package包/类
/**
 * Create and return the index of the ArcHarvestFile.
 * @param baseDir the base directory of the arcs
 * @throws IOException thrown if there is an error
 * @throws ParseException 
 */
public Map<String, HarvestResourceDTO> index(File baseDir) throws IOException, ParseException {
	Map<String, HarvestResourceDTO> results = new HashMap<String, HarvestResourceDTO>();
	
	File theArchiveFile = new File(baseDir, this.getName());
	ArchiveReader reader = ArchiveReaderFactory.get(theArchiveFile);
	this.compressed = reader.isCompressed();
	
	Iterator<ArchiveRecord> it = reader.iterator();
	while(it.hasNext()) {
		ArchiveRecord rec = it.next();
		
		if(rec instanceof WARCRecord) {
			String type = rec.getHeader().getHeaderValue(WARCConstants.HEADER_KEY_TYPE).toString();
			if(type.equals(WARCConstants.RESPONSE)) {
				String mime = rec.getHeader().getMimetype();
				if(!mime.equals("text/dns")) {
					indexWARCResponse(rec, results);
				}
			}
		}
		else {
			indexARCRecord(rec, results);
		}
	}
	reader.close();
	
	return results;
}
 
开发者ID:DIA-NZ,项目名称:webcurator,代码行数:35,代码来源:ArcHarvestFileDTO.java

示例3: skipHeaders

import org.archive.io.warc.WARCConstants; //导入依赖的package包/类
private void skipHeaders(ArchiveRecord record) throws IOException {
	HttpParser.parseHeaders(record, WARCConstants.DEFAULT_ENCODING);
}
 
开发者ID:DIA-NZ,项目名称:webcurator,代码行数:4,代码来源:ArcDigitalAssetStoreService.java

示例4: getRecordType

import org.archive.io.warc.WARCConstants; //导入依赖的package包/类
/**
 * Find out what type of WARC-record this is.
 * @param record a given WARCRecord
 * @return the type of WARCRecord as a String.
 */
public static String getRecordType(WARCRecord record) {
    ArgumentNotValid.checkNotNull(record, "record");
    ArchiveRecordHeader header = record.getHeader();
    return (String) header.getHeaderValue(WARCConstants.HEADER_KEY_TYPE);
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:11,代码来源:WARCUtils.java

示例5: adaptInner

import org.archive.io.warc.WARCConstants; //导入依赖的package包/类
private CaptureSearchResult adaptInner(WARCRecord rec) throws IOException {
		
		ArchiveRecordHeader header = rec.getHeader();

		String type = header.getHeaderValue(WARCConstants.HEADER_KEY_TYPE).toString();
//		if(type.equals(WARCConstants.WARCINFO)) {
//			LOGGER.info("Skipping record type : " + type);
//			return null;
//		}

		CaptureSearchResult result = genericResult(rec);

		if(type.equals(WARCConstants.RESPONSE)) {
			String mime = annotater.transformHTTPMime(header.getMimetype());
			if(mime != null && mime.equals("text/dns")) {
				// close to complete reading, then the digest is legit
				// TODO: DO we want to use the WARC header digest for this?
				rec.close();
				result.setDigest(transformWARCDigest(rec.getDigestStr()));
				result.setMimeType(mime);
			} else {
				result = adaptWARCHTTPResponse(result,rec);
			}
		} else if(type.equals(WARCConstants.REVISIT)) {
			// also set the mime type:
			result.setMimeType("warc/revisit");

		} else if(type.equals(WARCConstants.REQUEST)) {
			
			if(processAll) {
				// also set the mime type:
				result.setMimeType("warc/request");
			} else {
				result = null;
			}
		} else if(type.equals(WARCConstants.METADATA)) {

			if(processAll) {
				// also set the mime type:
				result.setMimeType("warc/metadata");
			} else {
				result = null;
			}
		} else if(type.equals(WARCConstants.WARCINFO)) {

			result.setMimeType(WARC_FILEDESC_VERSION);

		} else {
			LOGGER.info("Skipping record type : " + type);
		}

		return result;
	}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:54,代码来源:NetarchiveSuiteWARCRecordToSearchResultAdapter.java

示例6: genericResult

import org.archive.io.warc.WARCConstants; //导入依赖的package包/类
private CaptureSearchResult genericResult(WARCRecord rec) {

		CaptureSearchResult result = new CaptureSearchResult();

		result.setMimeType(DEFAULT_VALUE);
		result.setHttpCode(DEFAULT_VALUE);
		result.setRedirectUrl(DEFAULT_VALUE);

		ArchiveRecordHeader header = rec.getHeader();

		String file = transformWARCFilename(header.getReaderIdentifier());
		long offset = header.getOffset();
		
		result.setCaptureTimestamp(transformWARCDate(header.getDate()));
		result.setFile(file);
		result.setOffset(offset);
		result.setDigest(transformWARCDigest(header.getHeaderValue(
				WARCRecord.HEADER_KEY_PAYLOAD_DIGEST)));
		
		String origUrl = header.getUrl();
		if(origUrl == null) {
			String type = header.getHeaderValue(WARCConstants.HEADER_KEY_TYPE).toString();
			if(type.equals(WARCConstants.WARCINFO)) {
				String filename = header.getHeaderValue(
						WARCConstants.HEADER_KEY_FILENAME).toString();
				result.setOriginalUrl("filedesc:"+filename);
				result.setUrlKey("filedesc:"+filename);				
			} else {
				result.setOriginalUrl(DEFAULT_VALUE);
				result.setUrlKey(DEFAULT_VALUE);
			}

			
		} else {
			result.setOriginalUrl(origUrl);
			try {
				String urlKey = canonicalizer.urlStringToKey(origUrl);
				result.setUrlKey(urlKey);
			} catch (URIException e) {
				String shortUrl = 
					(origUrl.length() < 100) 
					? origUrl
					:origUrl.substring(0,100);
				LOGGER.warning("FAILED canonicalize(" + shortUrl + "):" + 
						file + " " + offset);
				result.setUrlKey(origUrl);
			}
		}
		return result;
	}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:51,代码来源:NetarchiveSuiteWARCRecordToSearchResultAdapter.java

示例7: getRecordType

import org.archive.io.warc.WARCConstants; //导入依赖的package包/类
/**
 * Find out what type of WARC-record this is.
 * @param record a given WARCRecord
 * @return the type of WARCRecord as a String.
 */
public static String getRecordType(WARCRecord record) {
    ArchiveRecordHeader header = record.getHeader();
    return (String) header.getHeaderValue(WARCConstants.HEADER_KEY_TYPE);
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:10,代码来源:WARCUtilsInTest.java


注:本文中的org.archive.io.warc.WARCConstants类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。