本文整理匯總了Java中org.archive.io.ReplayInputStream類的典型用法代碼示例。如果您正苦於以下問題:Java ReplayInputStream類的具體用法?Java ReplayInputStream怎麽用?Java ReplayInputStream使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
ReplayInputStream類屬於org.archive.io包,在下文中一共展示了ReplayInputStream類的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: innerProcessResult
import org.archive.io.ReplayInputStream; //導入依賴的package包/類
@Override
protected ProcessResult innerProcessResult(CrawlURI uri) {
CrawlURI curi = uri;
ReplayInputStream replayInputStream = null;
try {
if (shouldWrite(curi)) {
return write(curi);
}
} catch (IOException e) {
curi.getNonFatalFailures().add(e);
log.error("Failed write of Record: " + curi.toString(), e);
} finally {
ArchiveUtils.closeQuietly(replayInputStream);
}
return ProcessResult.PROCEED;
}
示例2: writeRequest
import org.archive.io.ReplayInputStream; //導入依賴的package包/類
protected URI writeRequest(final WARCWriter w,
final String timestamp, final String mimetype,
final URI baseid, final CrawlURI curi,
final ANVLRecord namedFields)
throws IOException {
final URI uid = qualifyRecordID(baseid, TYPE, REQUEST);
ReplayInputStream ris =
curi.getHttpRecorder().getRecordedOutput().getReplayInputStream();
try {
w.writeRequestRecord(curi.toString(), timestamp, mimetype, uid,
namedFields, ris,
curi.getHttpRecorder().getRecordedOutput().getSize());
} finally {
if (ris != null) {
ris.close();
}
}
return uid;
}
示例3: writeResponse
import org.archive.io.ReplayInputStream; //導入依賴的package包/類
protected URI writeResponse(final WARCWriter w,
final String timestamp, final String mimetype,
final URI baseid, final CrawlURI curi,
final ANVLRecord namedFields)
throws IOException {
ReplayInputStream ris =
curi.getHttpRecorder().getRecordedInput().getReplayInputStream();
try {
w.writeResponseRecord(curi.toString(), timestamp, mimetype, baseid,
namedFields, ris,
curi.getHttpRecorder().getRecordedInput().getSize());
} finally {
if (ris != null) {
ris.close();
}
}
return baseid;
}
示例4: writeResource
import org.archive.io.ReplayInputStream; //導入依賴的package包/類
protected URI writeResource(final WARCWriter w,
final String timestamp, final String mimetype,
final URI baseid, final CrawlURI curi,
final ANVLRecord namedFields)
throws IOException {
ReplayInputStream ris =
curi.getHttpRecorder().getRecordedInput().getReplayInputStream();
try {
w.writeResourceRecord(curi.toString(), timestamp, mimetype, baseid,
namedFields, ris,
curi.getHttpRecorder().getRecordedInput().getSize());
} finally {
if (ris != null) {
ris.close();
}
}
return baseid;
}
示例5: testGapError
import org.archive.io.ReplayInputStream; //導入依賴的package包/類
public void testGapError() throws IOException {
ARCWriter writer = createArcWithOneRecord("testGapError", true);
String content = getContent();
// Make a 'weird' RIS that returns bad 'remaining' length
// awhen remaining should be 0
ReplayInputStream ris = new ReplayInputStream(content.getBytes(),
content.length(), null) {
public long remaining() {
return (super.remaining()==0) ? -1 : super.remaining();
}
};
String message = null;
try {
writer.write(SOME_URL, "text/html", "192.168.1.1",
(new Date()).getTime(), content.length(), ris);
} catch (IOException e) {
message = e.getMessage();
} finally {
IOUtils.closeQuietly(ris);
}
writer.close();
assertTrue("No gap when should be",
message != null &&
message.indexOf("Gap between expected and actual") >= 0);
}
示例6: writeRevisitDigest
import org.archive.io.ReplayInputStream; //導入依賴的package包/類
protected URI writeRevisitDigest(final WARCWriter w,
final String timestamp, final String mimetype,
final URI baseid, final CrawlURI curi,
final ANVLRecord namedFields)
throws IOException {
namedFields.addLabelValue(
HEADER_KEY_PROFILE, PROFILE_REVISIT_IDENTICAL_DIGEST);
namedFields.addLabelValue(
HEADER_KEY_TRUNCATED, NAMED_FIELD_TRUNCATED_VALUE_LENGTH);
ReplayInputStream ris = null;
long revisedLength = 0;
// null mimetype implies no payload
if (mimetype != null) {
ris = curi.getHttpRecorder().getRecordedInput().getReplayInputStream();
revisedLength = curi.getHttpRecorder().getRecordedInput().getContentBegin();
revisedLength = revisedLength > 0
? revisedLength
: curi.getHttpRecorder().getRecordedInput().getSize();
}
try {
w.writeRevisitRecord(curi.toString(), timestamp, mimetype, baseid,
namedFields, ris, revisedLength);
} finally {
if (ris != null) {
ris.close();
}
}
curi.addAnnotation("warcRevisit:digest");
return baseid;
}
示例7: writeRevisitNotModified
import org.archive.io.ReplayInputStream; //導入依賴的package包/類
protected URI writeRevisitNotModified(final WARCWriter w,
final String timestamp,
final URI baseid, final CrawlURI curi,
final ANVLRecord namedFields)
throws IOException {
namedFields.addLabelValue(
HEADER_KEY_PROFILE, PROFILE_REVISIT_NOT_MODIFIED);
// save just enough context to understand basis of not-modified
if(curi.containsKey(A_HTTP_TRANSACTION)) {
HttpMethodBase method =
(HttpMethodBase) curi.getObject(A_HTTP_TRANSACTION);
saveHeader(A_ETAG_HEADER,method,namedFields,HEADER_KEY_ETAG);
saveHeader(A_LAST_MODIFIED_HEADER,method,namedFields,
HEADER_KEY_LAST_MODIFIED);
}
// truncate to zero-length (all necessary info is above)
namedFields.addLabelValue(HEADER_KEY_TRUNCATED,
NAMED_FIELD_TRUNCATED_VALUE_LENGTH);
ReplayInputStream ris =
curi.getHttpRecorder().getRecordedInput().getReplayInputStream();
try {
w.writeRevisitRecord(curi.toString(), timestamp, null, baseid,
namedFields, ris, 0);
} finally {
if (ris != null) {
ris.close();
}
}
curi.addAnnotation("warcRevisit:notModified");
return baseid;
}
示例8: write
import org.archive.io.ReplayInputStream; //導入依賴的package包/類
/**
* Write a record with the given metadata/content.
*
* @param uri
* URI for metadata-line
* @param contentType
* MIME content-type for metadata-line
* @param hostIP
* IP for metadata-line
* @param fetchBeginTimeStamp
* timestamp for metadata-line
* @param recordLength
* length for metadata-line; also may be enforced
* @param in
* source InputStream for record content
* @param enforceLength
* whether to enforce the declared length; should be true
* unless intentionally writing bad records for testing
* @throws IOException
*/
public void write(String uri, String contentType, String hostIP,
long fetchBeginTimeStamp, long recordLength, InputStream in,
boolean enforceLength) throws IOException {
preWriteRecordTasks();
try {
write(getMetaLine(uri, contentType, hostIP, fetchBeginTimeStamp,
recordLength).getBytes(UTF8));
copyFrom(in, recordLength, enforceLength);
if (in instanceof ReplayInputStream) {
// check for consumption of entire recorded material
long remaining = ((ReplayInputStream) in).remaining();
// Should be zero at this stage. If not, something is
// wrong.
if (remaining != 0) {
String message = "Gap between expected and actual: "
+ remaining + LINE_SEPARATOR + DevUtils.extraInfo()
+ " writing arc "
+ this.getFile().getAbsolutePath();
DevUtils.warnHandle(new Throwable(message), message);
throw new IOException(message);
}
}
write(LINE_SEPARATOR);
} finally {
postWriteRecordTasks();
}
}
示例9: modifyPut
import org.archive.io.ReplayInputStream; //導入依賴的package包/類
/**
* * This is a stub method and is here to allow extension/overriding for
* custom content parsing, data manipulation and to populate new columns.
*
* For Example : html parsing, text extraction, analysis and transformation
* and storing the results in new column families/columns using the batch
* update object. Or even saving the values in other custom hbase tables or
* other remote data sources. (a.k.a. anything you want)
*
* @param hbaseParameters
* - the configured hbase parameters for this crawl job
*
* @param curi
* - This requested uri for this content
*
* @param ip
* - the ip the host name in the uri resolves to
*
* @param put
* the stateful put object containing all the row data to be
* written. This is the 'output' object.
*
* @param recordingOutputStream
* - request to the server (output from us to the server)
*
* @param recordingInputStream
* - the server response (input from the server to us)
*
* @throws IOException
*/
public void modifyPut(final HBaseParameters hBaseParameters, final CrawlURI curi, final String ip, Put put, RecordingOutputStream recordingOutputStream,
RecordingInputStream recordingInputStream) throws IOException {
// Both request and response streams are available in this method.
// NOTE: be sure to close your streams when you are done reading them.
boolean optional = false;
if (optional) {
// EXAMPLE OF HOW TO ACCESS CLIENT REQUEST DATA, THIS IS DATA SENT
// FROM HERITRIX
ReplayInputStream requestStream = recordingOutputStream.getReplayInputStream();
HBaseWriter.getByteArrayFromInputStream(requestStream, (int) recordingOutputStream.getSize());
// EXAMPLE OF HOW TO ACCESS SERVER RESPONSE DATA, THIS IS DATA SENT
// FROM THE WEB SERVER
ReplayInputStream resopnseStream = recordingInputStream.getReplayInputStream();
HBaseWriter.getByteArrayFromInputStream(resopnseStream, (int) recordingInputStream.getSize());
}
}
示例10: getByteArrayFromInputStream
import org.archive.io.ReplayInputStream; //導入依賴的package包/類
/**
* Read the ReplayInputStream and write it to the given BatchUpdate with the
* given column.
*
* @param replayInputStream
* the ris the cell data as a replay input stream
* @param streamSize
* the size
*
* @return the byte array from input stream
*
* @throws IOException
* Signals that an I/O exception has occurred.
*/
public static byte[] getByteArrayFromInputStream(final ReplayInputStream replayInputStream, final int streamSize) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream(streamSize);
try {
// read the InputStream to the ByteArrayOutputStream
replayInputStream.readFullyTo(baos);
} finally {
IOUtils.closeStream(replayInputStream);
baos.close();
}
return baos.toByteArray();
}
示例11: getReplayInputStream
import org.archive.io.ReplayInputStream; //導入依賴的package包/類
/**
* Get a raw replay of all recorded data (including, for example, HTTP
* protocol headers)
*
* @return A replay input stream.
* @throws IOException
*/
public ReplayInputStream getReplayInputStream() throws IOException {
return getRecordedInput().getReplayInputStream();
}
示例12: getMessageBodyReplayInputStream
import org.archive.io.ReplayInputStream; //導入依賴的package包/類
/**
* Get a raw replay of the 'message-body'. For the common case of
* HTTP, this is the raw, possibly chunked-transfer-encoded message
* contents not including the leading headers.
*
* @return A replay input stream.
* @throws IOException
*/
public ReplayInputStream getMessageBodyReplayInputStream() throws IOException {
return getRecordedInput().getMessageBodyReplayInputStream();
}