本文整理汇总了Java中org.archive.io.warc.WARCConstants类的典型用法代码示例。如果您正苦于以下问题:Java WARCConstants类的具体用法?Java WARCConstants怎么用?Java WARCConstants使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
WARCConstants类属于org.archive.io.warc包,在下文中一共展示了WARCConstants类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: write
import org.archive.io.warc.WARCConstants; //导入依赖的package包/类
@Override
public void write(String uri, String contentType, String hostIP,
long fetchBeginTimeStamp, byte[] payload)
throws java.io.IOException {
String create14DigitDate = ArchiveDateConverter.getWarcDateFormat()
.format(new Date(fetchBeginTimeStamp));
ByteArrayInputStream in = new ByteArrayInputStream(payload);
String blockDigest = ChecksumCalculator.calculateSha1(in);
in = new ByteArrayInputStream(payload); // A re-read is necessary here!
ANVLRecord namedFields = new ANVLRecord(3);
namedFields.addLabelValue(
WARCConstants.HEADER_KEY_BLOCK_DIGEST, "sha1:" + blockDigest);
namedFields.addLabelValue("WARC-Warcinfo-ID",
generateEncapsulatedRecordID(warcInfoUID));
namedFields.addLabelValue("WARC-IP-Address", SystemUtils.getLocalIP());
URI recordId;
try {
recordId = new URI("urn:uuid:" + UUID.randomUUID().toString());
} catch (URISyntaxException e) {
throw new IllegalState("Epic fail creating URI from UUID!");
}
writer.writeResourceRecord(uri, create14DigitDate, contentType,
recordId, namedFields, in, payload.length);
}
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:26,代码来源:MetadataFileWriterWarc.java
示例2: index
import org.archive.io.warc.WARCConstants; //导入依赖的package包/类
/**
* Create and return the index of the ArcHarvestFile.
* @param baseDir the base directory of the arcs
* @throws IOException thrown if there is an error
* @throws ParseException
*/
public Map<String, HarvestResourceDTO> index(File baseDir) throws IOException, ParseException {
Map<String, HarvestResourceDTO> results = new HashMap<String, HarvestResourceDTO>();
File theArchiveFile = new File(baseDir, this.getName());
ArchiveReader reader = ArchiveReaderFactory.get(theArchiveFile);
this.compressed = reader.isCompressed();
Iterator<ArchiveRecord> it = reader.iterator();
while(it.hasNext()) {
ArchiveRecord rec = it.next();
if(rec instanceof WARCRecord) {
String type = rec.getHeader().getHeaderValue(WARCConstants.HEADER_KEY_TYPE).toString();
if(type.equals(WARCConstants.RESPONSE)) {
String mime = rec.getHeader().getMimetype();
if(!mime.equals("text/dns")) {
indexWARCResponse(rec, results);
}
}
}
else {
indexARCRecord(rec, results);
}
}
reader.close();
return results;
}
示例3: skipHeaders
import org.archive.io.warc.WARCConstants; //导入依赖的package包/类
private void skipHeaders(ArchiveRecord record) throws IOException {
HttpParser.parseHeaders(record, WARCConstants.DEFAULT_ENCODING);
}
示例4: getRecordType
import org.archive.io.warc.WARCConstants; //导入依赖的package包/类
/**
* Find out what type of WARC-record this is.
* @param record a given WARCRecord
* @return the type of WARCRecord as a String.
*/
public static String getRecordType(WARCRecord record) {
ArgumentNotValid.checkNotNull(record, "record");
ArchiveRecordHeader header = record.getHeader();
return (String) header.getHeaderValue(WARCConstants.HEADER_KEY_TYPE);
}
示例5: adaptInner
import org.archive.io.warc.WARCConstants; //导入依赖的package包/类
private CaptureSearchResult adaptInner(WARCRecord rec) throws IOException {
ArchiveRecordHeader header = rec.getHeader();
String type = header.getHeaderValue(WARCConstants.HEADER_KEY_TYPE).toString();
// if(type.equals(WARCConstants.WARCINFO)) {
// LOGGER.info("Skipping record type : " + type);
// return null;
// }
CaptureSearchResult result = genericResult(rec);
if(type.equals(WARCConstants.RESPONSE)) {
String mime = annotater.transformHTTPMime(header.getMimetype());
if(mime != null && mime.equals("text/dns")) {
// close to complete reading, then the digest is legit
// TODO: DO we want to use the WARC header digest for this?
rec.close();
result.setDigest(transformWARCDigest(rec.getDigestStr()));
result.setMimeType(mime);
} else {
result = adaptWARCHTTPResponse(result,rec);
}
} else if(type.equals(WARCConstants.REVISIT)) {
// also set the mime type:
result.setMimeType("warc/revisit");
} else if(type.equals(WARCConstants.REQUEST)) {
if(processAll) {
// also set the mime type:
result.setMimeType("warc/request");
} else {
result = null;
}
} else if(type.equals(WARCConstants.METADATA)) {
if(processAll) {
// also set the mime type:
result.setMimeType("warc/metadata");
} else {
result = null;
}
} else if(type.equals(WARCConstants.WARCINFO)) {
result.setMimeType(WARC_FILEDESC_VERSION);
} else {
LOGGER.info("Skipping record type : " + type);
}
return result;
}
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:54,代码来源:NetarchiveSuiteWARCRecordToSearchResultAdapter.java
示例6: genericResult
import org.archive.io.warc.WARCConstants; //导入依赖的package包/类
private CaptureSearchResult genericResult(WARCRecord rec) {
CaptureSearchResult result = new CaptureSearchResult();
result.setMimeType(DEFAULT_VALUE);
result.setHttpCode(DEFAULT_VALUE);
result.setRedirectUrl(DEFAULT_VALUE);
ArchiveRecordHeader header = rec.getHeader();
String file = transformWARCFilename(header.getReaderIdentifier());
long offset = header.getOffset();
result.setCaptureTimestamp(transformWARCDate(header.getDate()));
result.setFile(file);
result.setOffset(offset);
result.setDigest(transformWARCDigest(header.getHeaderValue(
WARCRecord.HEADER_KEY_PAYLOAD_DIGEST)));
String origUrl = header.getUrl();
if(origUrl == null) {
String type = header.getHeaderValue(WARCConstants.HEADER_KEY_TYPE).toString();
if(type.equals(WARCConstants.WARCINFO)) {
String filename = header.getHeaderValue(
WARCConstants.HEADER_KEY_FILENAME).toString();
result.setOriginalUrl("filedesc:"+filename);
result.setUrlKey("filedesc:"+filename);
} else {
result.setOriginalUrl(DEFAULT_VALUE);
result.setUrlKey(DEFAULT_VALUE);
}
} else {
result.setOriginalUrl(origUrl);
try {
String urlKey = canonicalizer.urlStringToKey(origUrl);
result.setUrlKey(urlKey);
} catch (URIException e) {
String shortUrl =
(origUrl.length() < 100)
? origUrl
:origUrl.substring(0,100);
LOGGER.warning("FAILED canonicalize(" + shortUrl + "):" +
file + " " + offset);
result.setUrlKey(origUrl);
}
}
return result;
}
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:51,代码来源:NetarchiveSuiteWARCRecordToSearchResultAdapter.java
示例7: getRecordType
import org.archive.io.warc.WARCConstants; //导入依赖的package包/类
/**
* Find out what type of WARC-record this is.
* @param record a given WARCRecord
* @return the type of WARCRecord as a String.
*/
public static String getRecordType(WARCRecord record) {
ArchiveRecordHeader header = record.getHeader();
return (String) header.getHeaderValue(WARCConstants.HEADER_KEY_TYPE);
}