本文整理汇总了Java中org.jwat.common.HttpHeader类的典型用法代码示例。如果您正苦于以下问题:Java HttpHeader类的具体用法?Java HttpHeader怎么用?Java HttpHeader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
HttpHeader类属于org.jwat.common包,在下文中一共展示了HttpHeader类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: test
import org.jwat.common.HttpHeader; //导入依赖的package包/类
@Test
public void test() throws IOException {
Path crawlPath = temp.newFolder().toPath();
Path outdir = temp.newFolder().toPath();
TestUtils.unzip(HttrackRecordTest.class.getResourceAsStream("testcrawl-3.49-2.zip"), crawlPath);
Httrack2Warc httrack2Warc = new Httrack2Warc();
httrack2Warc.setOutputDirectory(outdir);
httrack2Warc.convert(crawlPath);
StringBuilder summary = new StringBuilder();
try (WarcReader warcReader = WarcReaderFactory.getReaderCompressed(Files.newInputStream(outdir.resolve("crawl-0.warc.gz")))) {
for (WarcRecord warcRecord: warcReader) {
String type = getHeader(warcRecord, "WARC-Type");
String url = getHeader(warcRecord, "WARC-Target-URI");
summary.append(type).append(" ").append(url).append("\n");
if (type.equals("request") || type.equals("response")) {
HttpHeader httpHeader = warcRecord.getHttpHeader();
assertEquals("HTTP/1.1", httpHeader.httpVersion);
} else if (type.equals("warcinfo")) {
String payload = slurp(warcRecord.getPayloadContent());
assertEquals("software: HTTrack/3.49-2 http://www.httrack.com/\r\n" +
"software: httrack2warc https://github.com/nla/httrack2warc\r\n" +
"httrackOptions: -%H http://test.example.org/\r\n", payload);
}
}
}
assertEquals("warcinfo null\n" +
"response http://test.example.org/\n" +
"request http://test.example.org/\n" +
"metadata http://test.example.org/\n" +
"response http://test.example.org/style.css\n" +
"request http://test.example.org/style.css\n" +
"metadata http://test.example.org/style.css\n" +
"response http://test.example.org/query.html?page=1&query=2&FOO=3&&BaR=4&&#anchor\n" +
"request http://test.example.org/query.html?page=1&query=2&FOO=3&&BaR=4&&#anchor\n" +
"metadata http://test.example.org/query.html?page=1&query=2&FOO=3&&BaR=4&&#anchor\n" +
"response http://test.example.org/another\n" +
"request http://test.example.org/another\n" +
"metadata http://test.example.org/another\n" +
"response http://test.example.org/image.gif\n" +
"request http://test.example.org/image.gif\n" +
"metadata http://test.example.org/image.gif\n",
summary.toString());
}
示例2: processPayload
import org.jwat.common.HttpHeader; //导入依赖的package包/类
@Override
protected void processPayload(ByteCountingPushBackInputStream in,
ArcReader reader) throws IOException {
payload = null;
if (header.archiveLength != null && header.archiveLength > 0L) {
String digestAlgorithm = null;
if (reader.bBlockDigest) {
digestAlgorithm = reader.blockDigestAlgorithm;
}
payload = Payload.processPayload(in, header.archiveLength.longValue(),
reader.payloadHeaderMaxSize, digestAlgorithm);
payload.setOnClosedHandler(this);
// HttpHeader.
if (HttpHeader.isSupported(header.urlScheme)) {
// Never! -> && !ArcConstants.CONTENT_TYPE_NO_TYPE.equals(header.contentTypeStr)
digestAlgorithm = null;
if (reader.bPayloadDigest) {
digestAlgorithm = reader.payloadDigestAlgorithm;
}
// Try to read a valid HTTP response header from the payload.
httpHeader = HttpHeader.processPayload(HttpHeader.HT_RESPONSE,
payload.getInputStream(), header.archiveLength.longValue(),
digestAlgorithm);
if (httpHeader != null) {
if (httpHeader.isValid()) {
payload.setPayloadHeaderWrapped(httpHeader);
} else {
diagnostics.addError(
new Diagnosis(DiagnosisType.ERROR,
"http header",
"Unable to parse http header!"));
}
}
}
} else if (HttpHeader.isSupported(header.urlScheme)) {
// Never! -> && !ArcConstants.CONTENT_TYPE_NO_TYPE.equals(header.contentTypeStr)
diagnostics.addError(new Diagnosis(DiagnosisType.ERROR_EXPECTED,
ArcConstants.ARC_FILE,
"Expected payload not found in the record block"));
}
return;
}
示例3: map
import org.jwat.common.HttpHeader; //导入依赖的package包/类
@Override
public void map(LongWritable key, WarcRecord value, Context context) throws IOException, InterruptedException {
context.setStatus(Counters.CURRENT_RECORD + ": " + key.get());
// Only process http response content. Note that the outlinks can also be found in the wat metadata.
if ("application/http; msgtype=response".equals(value.header.contentTypeStr)) {
// org.jwat.warc.WarcRecord is kind enough to also parse http headers for us:
HttpHeader httpHeader = value.getHttpHeader();
if (httpHeader == null) {
// No header so we are unsure that the content is text/html: NOP
} else {
if (httpHeader.contentType != null && httpHeader.contentType.contains("text/html")) {
// Note that if you really want to do this right; you should look at the character encoding as well.
// We'll leave that as an exercise for you ;-).
context.getCounter(Counters.NUM_HTTP_RESPONSE_RECORDS).increment(1);
// Get the html payload
Payload payload = value.getPayload();
if (payload == null) {
// NOP
} else {
String warcContent = IOUtils.toString(payload.getInputStreamComplete());
if (warcContent == null && "".equals(warcContent)) {
// NOP
} else {
String targetURI = value.header.warcTargetUriStr;
Document doc = Jsoup.parse(warcContent);
Elements links = doc.select("a");
for (Element link : links) {
String absHref = link.attr("abs:href");
// Omit nulls and empty strings
if (absHref != null && !("".equals(absHref))) {
context.write(new Text(targetURI), new Text(absHref));
}
}
}
}
}
}
}
}
示例4: getNext
import org.jwat.common.HttpHeader; //导入依赖的package包/类
@Override
public Tuple getNext() throws IOException {
WarcRecord warcRecord = null;
try {
if (in.nextKeyValue()) {
warcRecord = in.getCurrentValue();
String url = warcRecord.header.warcTargetUriStr;
String length = null;
String type = null;
HttpHeader httpheader = warcRecord.getHttpHeader();
if (httpheader != null) {
length = new Long(httpheader.payloadLength).toString();
/*
* The Content-Type field is often of the form 'text/html; encoding="utf-8"; filename="...";'.
* We are only interested in the first part, so we strip and normalize the type field.
*/
if (httpheader.contentType != null) {
Scanner splitter = new Scanner(httpheader.contentType);
splitter.useDelimiter(";");
if(splitter.hasNext()) {
type = splitter.next().toLowerCase();
}
splitter.close();
}
}
/*
* You can expand this loader by returning values from
* the warcRecord as needed.
*/
Tuple t = mTupleFactory.newTuple(3);
t.set(0, url);
t.set(1, length);
t.set(2, type);
return t;
} else {
return null;
}
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
示例5: apcArcRecordStart
import org.jwat.common.HttpHeader; //导入依赖的package包/类
@Override
public void apcArcRecordStart(ArcRecordBase arcRecord, long startOffset, boolean compressed, File outputDirectory) throws IOException {
Payload payload = arcRecord.getPayload();
HttpHeader httpHeader = null;
if (payload != null) {
httpHeader = arcRecord.getHttpHeader();
if (httpHeader != null) {
StringWriter headerString = new StringWriter();
headerString.write("URL: " + arcRecord.getUrlStr() + "\n");
headerString.write("IP: " + arcRecord.getIpAddress() + "\n");
headerString.write("ProtocolVersion: " + httpHeader.getProtocolVersion() + "\n");
headerString.write("ProtocolStatusCode: " + httpHeader.getProtocolStatusCodeStr() + "\n");
headerString.write("ProtocolContentType: " + httpHeader.getProtocolContentType() + "\n");
headerString.write("TotalLength: " + httpHeader.getTotalLength() + "\n");
for (HeaderLine hl : httpHeader.getHeaderList()) {
headerString.write(hl.name + ": " + hl.value + "\n");
}
headerString.write("Filename: " + fileName + "\n");
headerString.write("Offset: " + arcRecord.getStartOffset() + "\n");
FileWriter headerFile = new FileWriter(
outputDirectory.getAbsolutePath() + "/" + fileName
+ "-"
+ arcRecord.getStartOffset()
+ "-"
+ recordNr);
headerFile.write(headerString.toString());
headerFile.close();
}
}
if (httpHeader != null) {
httpHeader.close();
}
if (payload != null) {
payload.close();
}
arcRecord.close();
++recordNr;
}
示例6: getHttpHeader
import org.jwat.common.HttpHeader; //导入依赖的package包/类
/**
* Returns the <code>HttpHeader</code> object if identified in the payload,
* or null.
* @return the <code>HttpHeader</code> object if identified or null
*/
public HttpHeader getHttpHeader() {
return httpHeader;
}