本文整理汇总了Java中org.archive.util.ArchiveUtils类的典型用法代码示例。如果您正苦于以下问题:Java ArchiveUtils类的具体用法?Java ArchiveUtils怎么用?Java ArchiveUtils使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
ArchiveUtils类属于org.archive.util包,在下文中一共展示了ArchiveUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: innerProcessResult
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
@Override
protected ProcessResult innerProcessResult(CrawlURI uri) {
CrawlURI curi = uri;
ReplayInputStream replayInputStream = null;
try {
if (shouldWrite(curi)) {
return write(curi);
}
} catch (IOException e) {
curi.getNonFatalFailures().add(e);
log.error("Failed write of Record: " + curi.toString(), e);
} finally {
ArchiveUtils.closeQuietly(replayInputStream);
}
return ProcessResult.PROCEED;
}
示例2: copySingleRecord
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
/**
* Writes the given ARCRecord on the given ARCWriter.
*
* Note that the ARCWriter.write method takes the metadata fields as
* separate arguments instead of accepting an ARCRecordMetaData object. It
* uses the ArchiveUtils.getDate method to convert an ARCstyle datestring to
* a Date object.
*
* @see ArchiveUtils#getDate(java.lang.String)
* @param aw
* The ARCWriter to output the record on.
* @param record
* The record to output
*/
private static void copySingleRecord(ARCWriter aw, ARCRecord record) {
try {
//Prepare metadata...
ARCRecordMetaData meta = record.getMetaData();
String uri = meta.getUrl();
String mime = meta.getMimetype();
String ip = meta.getIp();
// Note the ArchiveUtils.getDate() converts an ARC-style datestring
// to a Date object
long timeStamp = ArchiveUtils.getDate(meta.getDate()).getTime();
//...and write the given files content into the writer
// Note ARCRecord extends InputStream
aw.write(uri, mime, ip, timeStamp, meta.getLength(), record);
} catch (Exception e) {
throw new IOFailure("Error occurred while writing an ARC record"
+ record, e);
}
}
示例3: createARCWriter
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
/**
* Create new ARCWriter, writing to arcfile newFile.
* @param newFile the ARCfile, that the ARCWriter writes to.
* @return new ARCWriter, writing to arcfile newFile.
*/
public static ARCWriter createARCWriter(File newFile) {
ARCWriter aw;
PrintStream ps = null;
try {
ps = new PrintStream(new FileOutputStream(newFile));
aw = new ARCWriter(
new AtomicInteger(), ps,
//This name is used for the first (file metadata) record
newFile,
false, //Don't compress
//Use current time
ArchiveUtils.get14DigitDate(System.currentTimeMillis()),
null //No particular file metadata to add
);
} catch (IOException e) {
if (ps != null) {
ps.close();
}
String message = "Could not create ARCWriter to file '"
+ newFile + "'.\n";
log.warn(message);
throw new IOFailure(message, e);
}
return aw;
}
示例4: appendTimeField
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
protected static void appendTimeField(StringBuilder builder, Object obj) {
if(builder.length()>0) {
// prepend with delimiter
builder.append(' ');
}
if(obj==null) {
builder.append("-");
return;
}
if(obj instanceof Header) {
String s = ((Header)obj).getValue().trim();
try {
Date date = DateUtil.parseDate(s);
String d = ArchiveUtils.get14DigitDate(date);
if(d.startsWith("209")) {
d = "199"+d.substring(3);
}
obj = d;
} catch (DateParseException e) {
builder.append('e');
return;
}
}
builder.append(obj);
}
示例5: generateNewBasename
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
/**
* Generate a new basename by interpolating values in the configured
* template. Values come from local state, other configured values, and
* global system properties. The recommended default template will
* generate a unique basename under reasonable assumptions.
*/
protected void generateNewBasename() {
Properties localProps = new Properties();
localProps.setProperty("prefix", settings.getPrefix());
synchronized(this.getClass()) {
// ensure that serialNo and timestamp are minted together (never inverted sort order)
String paddedSerialNumber = WriterPoolMember.serialNoFormatter.format(serialNo.getAndIncrement());
String timestamp17 = ArchiveUtils.getUnique17DigitDate();
String timestamp14 = ArchiveUtils.getUnique14DigitDate();
currentTimestamp = timestamp17;
localProps.setProperty("serialno", paddedSerialNumber);
localProps.setProperty("timestamp17", timestamp17);
localProps.setProperty("timestamp14", timestamp14);
}
currentBasename = PropertyUtils.interpolateWithProperties(settings.getTemplate(),
localProps, System.getProperties());
}
示例6: writeRandomHTTPRecord
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
@SuppressWarnings("deprecation")
protected int writeRandomHTTPRecord(ARCWriter arcWriter, int index)
throws IOException {
String indexStr = Integer.toString(index);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
// Start the record with an arbitrary 14-digit date per RFC2540
String now = ArchiveUtils.get14DigitDate();
int recordLength = 0;
byte[] record = (getContent(indexStr)).getBytes();
recordLength += record.length;
baos.write(record);
// Add the newline between records back in
baos.write("\n".getBytes());
recordLength += 1;
arcWriter.write("http://www.one.net/id=" + indexStr, "text/html",
"0.1.2.3", Long.parseLong(now), recordLength, baos);
return recordLength;
}
示例7: writeWarcinfoRecord
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
private void writeWarcinfoRecord(WARCWriter writer)
throws IOException {
WARCRecordInfo recordInfo = new WARCRecordInfo();
recordInfo.setType(WARCRecordType.warcinfo);
recordInfo.setUrl(null);
recordInfo.setCreate14DigitDate(ArchiveUtils.getLog14Date());
recordInfo.setMimetype(ANVLRecord.MIMETYPE);
recordInfo.setExtraHeaders(null);
recordInfo.setEnforceLength(true);
ANVLRecord meta = new ANVLRecord();
meta.addLabelValue("size", "1G");
meta.addLabelValue("operator", "igor");
byte [] bytes = meta.getUTF8Bytes();
recordInfo.setContentStream(new ByteArrayInputStream(bytes));
recordInfo.setContentLength((long) bytes.length);
final URI recordid = writer.generateRecordId(WARCWriter.TYPE, WARCRecordType.warcinfo.toString());
recordInfo.setRecordId(recordid);
writer.writeRecord(recordInfo);
}
示例8: writeBasicRecords
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
protected void writeBasicRecords(final WARCWriter writer)
throws IOException {
WARCRecordInfo recordInfo = new WARCRecordInfo();
recordInfo.setType(WARCRecordType.metadata);
recordInfo.setUrl("http://www.archive.org/");
recordInfo.setCreate14DigitDate(ArchiveUtils.get14DigitDate());
recordInfo.setMimetype("no/type");
recordInfo.setEnforceLength(true);
ANVLRecord headerFields = new ANVLRecord();
headerFields.addLabelValue("x", "y");
headerFields.addLabelValue("a", "b");
recordInfo.setExtraHeaders(headerFields);
URI rid = (new UUIDGenerator()).getQualifiedRecordID(TYPE, WARCRecordType.metadata.toString());
recordInfo.setRecordId(rid);
final String content = "Any old content.";
for (int i = 0; i < 10; i++) {
String body = i + ". " + content;
byte [] bodyBytes = body.getBytes(UTF8Bytes.UTF8);
recordInfo.setContentStream(new ByteArrayInputStream(bodyBytes));
recordInfo.setContentLength((long)bodyBytes.length);
writer.writeRecord(recordInfo);
}
}
示例9: writeRecord
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
protected static void writeRecord(WARCWriter w, String url,
String mimetype, int len, ByteArrayOutputStream baos)
throws IOException {
WARCRecordInfo recordInfo = new WARCRecordInfo();
recordInfo.setType(WARCRecordType.resource);
recordInfo.setUrl(url);
recordInfo.setCreate14DigitDate(ArchiveUtils.get14DigitDate());
recordInfo.setMimetype(mimetype);
recordInfo.setRecordId(w.generateRecordId(null));
recordInfo.setExtraHeaders(null);
recordInfo.setContentStream(new ByteArrayInputStream(baos.toByteArray()));
recordInfo.setContentLength((long) len);
recordInfo.setEnforceLength(true);
w.writeRecord(recordInfo);
}
示例10: getToolsARCWriter
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
/**
* Return an ARCWriter suitable for the tools ArcMerge and ArcWrap.
* @param stream the given PrintStream.
* @param destinationArcfile the given destination ARC file.
* @return ARCWriter to be used by tools ArcMerge and ArcWrap
* @throws IOException redirect from ARCWriter constructure
*/
public static ARCWriter getToolsARCWriter(PrintStream stream,
File destinationArcfile) throws IOException {
return
new ARCWriter(new AtomicInteger(), stream,
destinationArcfile,
false, //Don't compress
// Use current time
ArchiveUtils.get14DigitDate(System.currentTimeMillis()),
null // //No particular file metadata to add
);
}
示例11: getTestARCWriter
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
/** Encapsulate ARCWriter creation for test-purposes.
* @param stream the PrintStream
* @param arcfile the destination arcfile
* @throws IOException
* @return new ARCWriter
*/
public static ARCWriter getTestARCWriter(PrintStream stream, File arcfile)
throws IOException {
return
new ARCWriter(new AtomicInteger(), stream, arcfile,
false, //Don't compress
ArchiveUtils.get14DigitDate(System.currentTimeMillis()), //Use current time
null //No particular file metadata to add
);
}
示例12: getTestWARCWriter
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
/** Encapsulate WARCWriter creation for test-purposes.
* @param stream the PrintStream
* @param warcfile the destination warcfile
* @throws IOException
* @return new WARCWriter
*/
public static WARCWriter getTestWARCWriter(PrintStream stream, File warcfile)
throws IOException {
return
new WARCWriterNAS(new AtomicInteger(), stream, warcfile,
false, //Don't compress
ArchiveUtils.get14DigitDate(System.currentTimeMillis()), //Use current time
null //No particular file metadata to add
);
}
示例13: dateRangeCheck
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
public boolean dateRangeCheck(String key)
{
// Allow a cluster to be "disabled" by specifying an empty ALL.loc
if (disabled) {
return false;
}
if ((startDate == null) && (endDate == null)) {
return true;
}
int spaceIndex = key.indexOf(' ');
if (spaceIndex < 0) {
return true;
}
String dateStr = key.substring(spaceIndex + 1);
Date reqDate = null;
try {
reqDate = ArchiveUtils.getDate(dateStr);
} catch (ParseException e) {
return true;
}
if ((startDate != null) && reqDate.before(startDate)) {
return false;
}
if ((endDate != null) && reqDate.after(endDate)) {
return false;
}
return true;
}
示例14: getMetaLine
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
/**
* @param uri
* @param contentType
* @param hostIP
* @param fetchBeginTimeStamp
* @param recordLength
* @return Metadata line for an ARCRecord made of passed components.
* @exception IOException
*/
protected String getMetaLine(String uri, String contentType, String hostIP,
long fetchBeginTimeStamp, long recordLength)
throws IOException {
if (fetchBeginTimeStamp <= 0) {
throw new IOException("Bogus fetchBeginTimestamp: " +
Long.toString(fetchBeginTimeStamp));
}
return validateMetaLine(createMetaline(uri, hostIP,
ArchiveUtils.get14DigitDate(fetchBeginTimeStamp),
MimetypeUtils.truncate(contentType),
Long.toString(recordLength)));
}
示例15: dump
import org.archive.util.ArchiveUtils; //导入依赖的package包/类
public void dump(final boolean compress)
throws IOException, java.text.ParseException {
// No point digesting if we're doing a dump.
setDigest(false);
boolean firstRecord = true;
ARCWriter writer = null;
for (Iterator<ArchiveRecord> ii = iterator(); ii.hasNext();) {
ARCRecord r = (ARCRecord)ii.next();
// We're to dump the arc on stdout.
// Get the first record's data if any.
ARCRecordMetaData meta = r.getMetaData();
if (firstRecord) {
firstRecord = false;
// Get an ARCWriter.
ByteArrayOutputStream baos =
new ByteArrayOutputStream(r.available());
// This is slow but done only once at top of ARC.
while (r.available() > 0) {
baos.write(r.read());
}
List<String> listOfMetadata = new ArrayList<String>();
listOfMetadata.add(baos.toString(WriterPoolMember.UTF8));
// Assume getArc returns full path to file. ARCWriter
// or new File will complain if it is otherwise.
List<File> outDirs = new ArrayList<File>();
WriterPoolSettingsData settings =
new WriterPoolSettingsData("","",-1L,compress,outDirs,listOfMetadata);
writer = new ARCWriter(new AtomicInteger(), System.out,
new File(meta.getArc()), settings);
continue;
}
writer.write(meta.getUrl(), meta.getMimetype(), meta.getIp(),
ArchiveUtils.parse14DigitDate(meta.getDate()).getTime(),
(int)meta.getLength(), r);
}
// System.out.println(System.currentTimeMillis() - start);
}