本文整理汇总了Java中org.archive.wayback.core.CaptureSearchResult类的典型用法代码示例。如果您正苦于以下问题:Java CaptureSearchResult类的具体用法?Java CaptureSearchResult怎么用?Java CaptureSearchResult使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CaptureSearchResult类属于org.archive.wayback.core包,在下文中一共展示了CaptureSearchResult类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testRetrieveRedirect
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void testRetrieveRedirect()
throws ResourceNotAvailableException, IOException {
String cdxLine = "netarkivet.dk/ 20090706131100 http://netarkivet.dk/ text/html 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ http://netarkivet.dk/index-da.php 3311 arcfile_withredirects.arc";
NetarchiveResourceStore store = new NetarchiveResourceStore();
CaptureSearchResult csr = (new CDXLineToSearchResultAdapter()).adapt(
cdxLine);
ArcResource resource = (ArcResource) store.retrieveResource(csr);
assertNotNull("Should have a resource", resource);
assertTrue(resource.getRecordLength()>0);
assertFalse(resource.getHttpHeaders().isEmpty());
assertEquals(302, resource.getStatusCode());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
resource.getArcRecord().dump(baos);
String contents = baos.toString("UTF-8");
assertNotNull(contents);
}
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:17,代码来源:NetarchiveResourceStoreWarcTester.java
示例2: testRetrieveResource
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void testRetrieveResource()
throws ResourceNotAvailableException, IOException {
String cdxLine = "ing.dk/ 20090706131100 http://ing.dk/ text/html 200 Z3UM6JX4FCO6VMVTPM6VBNJPN5D6QLO3 - 3619 arcfile_withredirects.arc";
NetarchiveResourceStore store = new NetarchiveResourceStore();
CaptureSearchResult csr = (new CDXLineToSearchResultAdapter()).adapt(cdxLine);
ArcResource resource = (ArcResource) store.retrieveResource(csr);
assertNotNull("Should have a resource", resource);
assertTrue(resource.getRecordLength()>0);
assertFalse(resource.getHttpHeaders().isEmpty());
assertEquals(200, resource.getStatusCode());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
resource.getArcRecord().dump(baos);
String contents = baos.toString("UTF-8");
assertNotNull(contents);
assertTrue(contents.contains("Motorola"));
}
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:17,代码来源:NetarchiveResourceStoreWarcTester.java
示例3: testJob
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void testJob() throws IOException {
File testFile = new File(TestInfo.WORKING_DIR, METADATA_FILENAME);
assertTrue("file should exist", testFile.isFile());
BatchLocalFiles files = new BatchLocalFiles(new File[]{testFile});
DeduplicationCDXExtractionBatchJob job = new DeduplicationCDXExtractionBatchJob();
ByteArrayOutputStream os = new ByteArrayOutputStream();
files.run(job, os);
os.flush();
String results = os.toString();
String[] cdx_lines = results.split("\\n");
assertTrue("Expect some results", cdx_lines.length > 2);
CDXLineToSearchResultAdapter adapter = new CDXLineToSearchResultAdapter();
for (String cdx_line: cdx_lines) {
CaptureSearchResult csr = adapter.adapt(cdx_line);
assertNotNull("Expect a mime type for every result", csr.getMimeType());
}
}
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:18,代码来源:DeduplicationCDXExtractionBatchJobTester.java
示例4: testAdaptLine
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void testAdaptLine() {
DeduplicateToCDXAdapterInterface adapter = new DeduplicateToCDXAdapter();
String cdx_line = adapter.adaptLine(DEDUP_CRAWL_STRING);
CDXLineToSearchResultAdapter adapter2 = new CDXLineToSearchResultAdapter();
CaptureSearchResult result = adapter2.adapt(cdx_line);
assertEquals("Should get the arcfilename back out of the cdx line",
"1-1-20090513141823-00008-sb-test-har-001.statsbiblioteket.dk.arc",result.getFile());
assertEquals("Should get the right http code out of the cdx line","200",result.getHttpCode());
String cdx_line2 = adapter.adaptLine(DEDUP_CRAWL_STRING2);
CaptureSearchResult result2 = adapter2.adapt(cdx_line2);
assertEquals("Should get the arcfilename back out of the cdx line",
"118657-119-20110428163750-00001-kb-prod-har-004.kb.dk.arc",result2.getFile());
assertEquals("Should get the right http code out of the cdx line","200",result2.getHttpCode());
}
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:17,代码来源:DeduplicateToCDXAdapterTester.java
示例5: testAdaptStream
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void testAdaptStream() throws IOException {
InputStream is = new FileInputStream(new File(TestInfo.WORKING_DIR, DEDUP_CRAWL_LOG));
OutputStream os = new ByteArrayOutputStream();
DeduplicateToCDXAdapterInterface adapter = new DeduplicateToCDXAdapter();
adapter.adaptStream(is, os);
os.close();
String output = os.toString();
String[] lines = output.split("\n");
CDXLineToSearchResultAdapter adapter2 = new CDXLineToSearchResultAdapter();
for (String line: lines) {
CaptureSearchResult csr = adapter2.adapt(line);
assertNotNull("Should have a valid mime type for every line, inclding '" + line + "'",
csr.getMimeType());
}
assertTrue("expect at least 3 lines of output, got " + lines.length, lines.length > 2);
}
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:17,代码来源:DeduplicateToCDXAdapterTester.java
示例6: testRetrieveRedirect
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void testRetrieveRedirect()
throws ResourceNotAvailableException, IOException {
String cdxLine = "netarkivet.dk/ 20090706131100 http://netarkivet.dk/ text/html 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ http://netarkivet.dk/index-da.php 3311 arcfile_withredirects.arc";
NetarchiveResourceStore store = new NetarchiveResourceStore();
CDXLineToSearchResultAdapter cdxAdapter = new CDXLineToSearchResultAdapter();
CaptureSearchResult csr = cdxAdapter.adapt(cdxLine);
ArcResource resource = (ArcResource) store.retrieveResource(csr);
assertNotNull("Should have a resource", resource);
assertTrue(resource.getRecordLength()>0);
assertFalse(resource.getHttpHeaders().isEmpty());
assertEquals(302, resource.getStatusCode());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
resource.getArcRecord().dump(baos);
String contents = baos.toString("UTF-8");
assertNotNull(contents);
}
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:17,代码来源:NetarchiveResourceStoreTester.java
示例7: testRetrieveResource
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void testRetrieveResource()
throws ResourceNotAvailableException, IOException {
String cdxLine = "ing.dk/ 20090706131100 http://ing.dk/ text/html 200 Z3UM6JX4FCO6VMVTPM6VBNJPN5D6QLO3 - 3619 arcfile_withredirects.arc";
NetarchiveResourceStore store = new NetarchiveResourceStore();
CDXLineToSearchResultAdapter cdxAdapter = new CDXLineToSearchResultAdapter();
CaptureSearchResult csr = cdxAdapter.adapt(cdxLine);
ArcResource resource = (ArcResource) store.retrieveResource(csr);
assertNotNull("Should have a resource", resource);
assertTrue(resource.getRecordLength()>0);
assertFalse(resource.getHttpHeaders().isEmpty());
assertEquals(200, resource.getStatusCode());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
resource.getArcRecord().dump(baos);
String contents = baos.toString("UTF-8");
assertNotNull(contents);
assertTrue(contents.contains("Motorola"));
}
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:18,代码来源:NetarchiveResourceStoreTester.java
示例8: adapt
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public CaptureSearchResult adapt(ARCRecord rec) {
try {
return adaptInner(rec);
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:9,代码来源:NetarchiveSuiteARCRecordToSearchResultAdapter.java
示例9: adaptWARCHTTPResponse
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
private CaptureSearchResult adaptWARCHTTPResponse(CaptureSearchResult result,
WARCRecord rec) throws IOException {
ArchiveRecordHeader header = rec.getHeader();
// need to parse the documents HTTP message and headers here: WARCReader
// does not implement this... yet..
byte [] statusBytes = HttpParser.readRawLine(rec);
int eolCharCount = getEolCharsCount(statusBytes);
if (eolCharCount <= 0) {
throw new RecoverableIOException("Failed to read http status where one " +
" was expected: " +
((statusBytes == null) ? "(null)" : new String(statusBytes)));
}
String statusLine = EncodingUtil.getString(statusBytes, 0,
statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING);
if ((statusLine == null) ||
!StatusLine.startsWithHTTP(statusLine)) {
throw new RecoverableIOException("Failed parse of http status line.");
}
StatusLine status = new StatusLine(statusLine);
result.setHttpCode(String.valueOf(status.getStatusCode()));
Header[] headers = HttpParser.parseHeaders(rec,
ARCConstants.DEFAULT_ENCODING);
annotater.annotateHTTPContent(result,rec,headers,header.getMimetype());
return result;
}
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:32,代码来源:NetarchiveSuiteWARCRecordToSearchResultAdapter.java
示例10: filterObject
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
@Override
public int filterObject(CaptureSearchResult captureSearchResult) {
// Note that the behaviour of the two calls to methods of the class
// ExclusionCaptureFilterGroup is not well documented. Omitting them
// results in the excluded objects being marked as not in the archive.
// With these calls, they are correctly identified as blocked.
filterGroup.setSawAdministrative();
for (Pattern regexp: regexps) {
if (regexp.matcher(captureSearchResult.getOriginalUrl()).matches()) {
return ObjectFilter.FILTER_EXCLUDE;
}
}
filterGroup.setPassedAdministrative();
return ObjectFilter.FILTER_INCLUDE;
}
示例11: setUp
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
@Override
public void setUp() {
super.setUp();
JMSConnectionMockupMQ.useJMSConnectionMockupMQ();
FileUtils.removeRecursively(TestInfo.WORKING_DIR);
TestFileUtils.copyDirectoryNonCVS(TestInfo.ORIGINALS_DIR, TestInfo.WORKING_DIR);
Settings.set(JMSArcRepositoryClient.ARCREPOSITORY_GET_TIMEOUT, "1000");
assertTrue("Should get a mock connection",
JMSConnectionFactory.getInstance() instanceof JMSConnectionMockupMQ);
arc = (ArcRepositoryClient) ArcRepositoryClientFactory.getPreservationInstance();
netarchiveResourceStore = new NetarchiveResourceStore();
metadataResource = new CaptureSearchResult();
metadataResource.setFile(metadataFile);
metadataResource.setOffset(0L);
uploadResource = new CaptureSearchResult();
uploadResource.setFile(uploadFile);
uploadResource.setOffset(2041L);
httpResource = new CaptureSearchResult();
httpResource.setOriginalUrl("http://www.netarkivet.dk/");
httpResource.setOffset(0L);
httpResource.setFile(metadataFile);
resourceNotAvaliable = new CaptureSearchResult();
}
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:31,代码来源:NetarchiveResourceStoreWarcTester.java
示例12: getPrefixIterator
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public CloseableIterator<CaptureSearchResult> getPrefixIterator(
String prefix) throws ResourceIndexNotAvailableException {
// TODO Auto-generated method stub
return null;
}
示例13: getPrefixReverseIterator
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public CloseableIterator<CaptureSearchResult> getPrefixReverseIterator(
String prefix) throws ResourceIndexNotAvailableException {
// TODO Auto-generated method stub
return null;
}
示例14: cleanup
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void cleanup(CloseableIterator<CaptureSearchResult> c)
throws IOException {
// TODO Auto-generated method stub
}
示例15: retrieveResource
import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
/**
* Transforms search result into a resource, according to the ResourceStore
* interface.
* @param captureSearchResult the search result.
* @return a valid resource containing metadata and a link to the ARC
* or warc-record
* @throws ResourceNotAvailableException if something went wrong fetching
* the record.
*/
public Resource retrieveResource(CaptureSearchResult captureSearchResult)
throws ResourceNotAvailableException {
long offset;
String arcfile = captureSearchResult.getFile();
offset = captureSearchResult.getOffset();
logger.info("Received request for resource from file '" + arcfile
+ "' at offset '" + offset + "'");
// Try to lookup the file in the cache
// make synchronized to disallow more than one using
// the cache at any one time
synchronized (fileCache) {
File wantedFile = fileCache.get(arcfile);
try {
if (wantedFile != null && wantedFile.exists()) {
logger.debug("Found the file '" + arcfile
+ "' in the cache. ");
return ResourceFactory.getResource(wantedFile, offset);
} else {
logger.debug("The file '" + arcfile
+ "' was not found in the cache. ");
// Get file from bitarchive, and place it in the cachedir
// directory
File fileFromBitarchive = new File(fileCache.getCacheDir(),
arcfile);
client.getFile(arcfile, replicaUsed, fileFromBitarchive);
// put into the cache
fileCache.put(arcfile, fileFromBitarchive);
logger.info("File '" + arcfile
+ "' downloaded from archive and put into the cache '"
+ fileCache.getCacheDir().getAbsolutePath()
+ "'.");
return ResourceFactory.getResource(fileFromBitarchive,
offset);
}
} catch (IOException e) {
logger.error("Error looking for non existing resource", e);
throw new ResourceNotAvailableException(this.getClass()
.getName()
+ "Throws Exception when accessing "
+ "CaptureResult given from Wayback.");
}
}
}
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:56,代码来源:NetarchiveCacheResourceStore.java