当前位置: 首页>>代码示例>>Java>>正文


Java CaptureSearchResult类代码示例

本文整理汇总了Java中org.archive.wayback.core.CaptureSearchResult的典型用法代码示例。如果您正苦于以下问题:Java CaptureSearchResult类的具体用法?Java CaptureSearchResult怎么用?Java CaptureSearchResult使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


CaptureSearchResult类属于org.archive.wayback.core包,在下文中一共展示了CaptureSearchResult类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testRetrieveRedirect

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void testRetrieveRedirect()
        throws ResourceNotAvailableException, IOException {
    String cdxLine = "netarkivet.dk/ 20090706131100 http://netarkivet.dk/ text/html 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ http://netarkivet.dk/index-da.php 3311 arcfile_withredirects.arc";
    NetarchiveResourceStore store = new NetarchiveResourceStore();
    CaptureSearchResult csr = (new CDXLineToSearchResultAdapter()).adapt(
            cdxLine);
    ArcResource resource = (ArcResource) store.retrieveResource(csr);
    assertNotNull("Should have a resource", resource);
    assertTrue(resource.getRecordLength()>0);
    assertFalse(resource.getHttpHeaders().isEmpty());
    assertEquals(302, resource.getStatusCode());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    resource.getArcRecord().dump(baos);
    String contents = baos.toString("UTF-8");
    assertNotNull(contents);
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:17,代码来源:NetarchiveResourceStoreWarcTester.java

示例2: testRetrieveResource

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void testRetrieveResource()
        throws ResourceNotAvailableException, IOException {
    String cdxLine = "ing.dk/ 20090706131100 http://ing.dk/ text/html 200 Z3UM6JX4FCO6VMVTPM6VBNJPN5D6QLO3 - 3619 arcfile_withredirects.arc";
    NetarchiveResourceStore store = new NetarchiveResourceStore();
    CaptureSearchResult csr = (new CDXLineToSearchResultAdapter()).adapt(cdxLine);
    ArcResource resource = (ArcResource) store.retrieveResource(csr);
    assertNotNull("Should have a resource", resource);
    assertTrue(resource.getRecordLength()>0);
    assertFalse(resource.getHttpHeaders().isEmpty());
    assertEquals(200, resource.getStatusCode());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    resource.getArcRecord().dump(baos);
    String contents = baos.toString("UTF-8");
    assertNotNull(contents);
    assertTrue(contents.contains("Motorola"));
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:17,代码来源:NetarchiveResourceStoreWarcTester.java

示例3: testJob

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void testJob() throws IOException {
    File testFile = new File(TestInfo.WORKING_DIR, METADATA_FILENAME);
    assertTrue("file should exist", testFile.isFile());
    BatchLocalFiles files = new BatchLocalFiles(new File[]{testFile});       
    DeduplicationCDXExtractionBatchJob job = new DeduplicationCDXExtractionBatchJob();
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    files.run(job, os);
    os.flush();
    String results = os.toString();
    String[] cdx_lines = results.split("\\n");
    assertTrue("Expect some results", cdx_lines.length > 2);
    CDXLineToSearchResultAdapter adapter = new CDXLineToSearchResultAdapter();
    for (String cdx_line: cdx_lines) {
        CaptureSearchResult csr = adapter.adapt(cdx_line);
        assertNotNull("Expect a mime type for every result", csr.getMimeType());
    }
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:18,代码来源:DeduplicationCDXExtractionBatchJobTester.java

示例4: testAdaptLine

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void testAdaptLine() {            
    DeduplicateToCDXAdapterInterface adapter = new DeduplicateToCDXAdapter();
    String cdx_line = adapter.adaptLine(DEDUP_CRAWL_STRING);
    CDXLineToSearchResultAdapter adapter2 = new CDXLineToSearchResultAdapter();
    CaptureSearchResult result = adapter2.adapt(cdx_line);
    assertEquals("Should get the arcfilename back out of the cdx line",
            "1-1-20090513141823-00008-sb-test-har-001.statsbiblioteket.dk.arc",result.getFile());
    assertEquals("Should get the right http code out of the cdx line","200",result.getHttpCode());
    
    String cdx_line2 = adapter.adaptLine(DEDUP_CRAWL_STRING2);
    CaptureSearchResult result2 = adapter2.adapt(cdx_line2);
    assertEquals("Should get the arcfilename back out of the cdx line",
            "118657-119-20110428163750-00001-kb-prod-har-004.kb.dk.arc",result2.getFile());
    assertEquals("Should get the right http code out of the cdx line","200",result2.getHttpCode());
    
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:17,代码来源:DeduplicateToCDXAdapterTester.java

示例5: testAdaptStream

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void testAdaptStream() throws IOException {
    InputStream is = new FileInputStream(new File(TestInfo.WORKING_DIR, DEDUP_CRAWL_LOG));
    OutputStream os = new ByteArrayOutputStream();
    DeduplicateToCDXAdapterInterface adapter = new DeduplicateToCDXAdapter();
    adapter.adaptStream(is, os);
    os.close();
    String output = os.toString();
    String[] lines = output.split("\n");
    CDXLineToSearchResultAdapter adapter2 = new CDXLineToSearchResultAdapter();
    for (String line: lines) {
        CaptureSearchResult csr = adapter2.adapt(line);
        assertNotNull("Should have a valid mime type for every line, inclding '" + line + "'", 
                csr.getMimeType());
    }
    assertTrue("expect at least 3 lines of output, got " + lines.length, lines.length > 2);
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:17,代码来源:DeduplicateToCDXAdapterTester.java

示例6: testRetrieveRedirect

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void testRetrieveRedirect()
        throws ResourceNotAvailableException, IOException {
    String cdxLine = "netarkivet.dk/ 20090706131100 http://netarkivet.dk/ text/html 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ http://netarkivet.dk/index-da.php 3311 arcfile_withredirects.arc";
    NetarchiveResourceStore store = new NetarchiveResourceStore();
    CDXLineToSearchResultAdapter cdxAdapter = new CDXLineToSearchResultAdapter();
    CaptureSearchResult csr = cdxAdapter.adapt(cdxLine);
    ArcResource resource = (ArcResource) store.retrieveResource(csr);
    assertNotNull("Should have a resource", resource);
    assertTrue(resource.getRecordLength()>0);
    assertFalse(resource.getHttpHeaders().isEmpty());
    assertEquals(302, resource.getStatusCode());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    resource.getArcRecord().dump(baos);
    String contents = baos.toString("UTF-8");
    assertNotNull(contents);
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:17,代码来源:NetarchiveResourceStoreTester.java

示例7: testRetrieveResource

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void testRetrieveResource()
        throws ResourceNotAvailableException, IOException {
    String cdxLine = "ing.dk/ 20090706131100 http://ing.dk/ text/html 200 Z3UM6JX4FCO6VMVTPM6VBNJPN5D6QLO3 - 3619 arcfile_withredirects.arc";
    NetarchiveResourceStore store = new NetarchiveResourceStore();
    CDXLineToSearchResultAdapter cdxAdapter = new CDXLineToSearchResultAdapter();
    CaptureSearchResult csr = cdxAdapter.adapt(cdxLine);
    ArcResource resource = (ArcResource) store.retrieveResource(csr);
    assertNotNull("Should have a resource", resource);
    assertTrue(resource.getRecordLength()>0);
    assertFalse(resource.getHttpHeaders().isEmpty());
    assertEquals(200, resource.getStatusCode());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    resource.getArcRecord().dump(baos);
    String contents = baos.toString("UTF-8");
    assertNotNull(contents);
    assertTrue(contents.contains("Motorola"));
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:18,代码来源:NetarchiveResourceStoreTester.java

示例8: adapt

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public CaptureSearchResult adapt(ARCRecord rec) {
    try {
        return adaptInner(rec);
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:9,代码来源:NetarchiveSuiteARCRecordToSearchResultAdapter.java

示例9: adaptWARCHTTPResponse

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
private CaptureSearchResult adaptWARCHTTPResponse(CaptureSearchResult result,
		WARCRecord rec) throws IOException {

	ArchiveRecordHeader header = rec.getHeader();
	// need to parse the documents HTTP message and headers here: WARCReader
	// does not implement this... yet..
	
       byte [] statusBytes = HttpParser.readRawLine(rec);
       int eolCharCount = getEolCharsCount(statusBytes);
       if (eolCharCount <= 0) {
           throw new RecoverableIOException("Failed to read http status where one " +
                   " was expected: " + 
                   ((statusBytes == null) ? "(null)" : new String(statusBytes)));
       }
       String statusLine = EncodingUtil.getString(statusBytes, 0,
           statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING);
       if ((statusLine == null) ||
               !StatusLine.startsWithHTTP(statusLine)) {
          throw new RecoverableIOException("Failed parse of http status line.");
       }
       StatusLine status = new StatusLine(statusLine);
	result.setHttpCode(String.valueOf(status.getStatusCode()));
       
	Header[] headers = HttpParser.parseHeaders(rec,
               ARCConstants.DEFAULT_ENCODING);

	
	annotater.annotateHTTPContent(result,rec,headers,header.getMimetype());

	return result;
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:32,代码来源:NetarchiveSuiteWARCRecordToSearchResultAdapter.java

示例10: filterObject

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
@Override
public int filterObject(CaptureSearchResult captureSearchResult) {
    // Note that the behaviour of the two calls to methods of the class
    // ExclusionCaptureFilterGroup is not well documented. Omitting them
    // results in the excluded objects being marked as not in the archive.
    // With these calls, they are correctly identified as blocked.
    filterGroup.setSawAdministrative();
    for (Pattern regexp: regexps) {
        if (regexp.matcher(captureSearchResult.getOriginalUrl()).matches()) {
            return ObjectFilter.FILTER_EXCLUDE;
        }
    }
    filterGroup.setPassedAdministrative();
    return ObjectFilter.FILTER_INCLUDE;
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:16,代码来源:RegExpExclusionFilter.java

示例11: setUp

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
@Override
public void setUp() {
    super.setUp();
    JMSConnectionMockupMQ.useJMSConnectionMockupMQ();
    FileUtils.removeRecursively(TestInfo.WORKING_DIR);
    TestFileUtils.copyDirectoryNonCVS(TestInfo.ORIGINALS_DIR, TestInfo.WORKING_DIR);
    
    Settings.set(JMSArcRepositoryClient.ARCREPOSITORY_GET_TIMEOUT, "1000");
    assertTrue("Should get a mock connection",
        JMSConnectionFactory.getInstance() instanceof JMSConnectionMockupMQ);
    arc = (ArcRepositoryClient) ArcRepositoryClientFactory.getPreservationInstance();

    netarchiveResourceStore = new NetarchiveResourceStore();

    metadataResource = new CaptureSearchResult();
    metadataResource.setFile(metadataFile);
    metadataResource.setOffset(0L);

    uploadResource = new CaptureSearchResult();
    uploadResource.setFile(uploadFile);
    uploadResource.setOffset(2041L);

    httpResource = new CaptureSearchResult();
    httpResource.setOriginalUrl("http://www.netarkivet.dk/");
    httpResource.setOffset(0L);
    httpResource.setFile(metadataFile);


    resourceNotAvaliable = new CaptureSearchResult();
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:31,代码来源:NetarchiveResourceStoreWarcTester.java

示例12: getPrefixIterator

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public CloseableIterator<CaptureSearchResult> getPrefixIterator(
        String prefix) throws ResourceIndexNotAvailableException {
    // TODO Auto-generated method stub
    return null;
}
 
开发者ID:anjackson,项目名称:soldx,代码行数:6,代码来源:SolrSearchResultSource.java

示例13: getPrefixReverseIterator

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public CloseableIterator<CaptureSearchResult> getPrefixReverseIterator(
        String prefix) throws ResourceIndexNotAvailableException {
    // TODO Auto-generated method stub
    return null;
}
 
开发者ID:anjackson,项目名称:soldx,代码行数:6,代码来源:SolrSearchResultSource.java

示例14: cleanup

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
public void cleanup(CloseableIterator<CaptureSearchResult> c)
        throws IOException {
    // TODO Auto-generated method stub

}
 
开发者ID:anjackson,项目名称:soldx,代码行数:6,代码来源:SolrSearchResultSource.java

示例15: retrieveResource

import org.archive.wayback.core.CaptureSearchResult; //导入依赖的package包/类
/**
 * Transforms search result into a resource, according to the ResourceStore
 * interface.
 * @param captureSearchResult the search result.
 * @return a valid resource containing metadata and a link to the ARC
 * or warc-record 
 * @throws ResourceNotAvailableException if something went wrong fetching
 * the record.
 */
public Resource retrieveResource(CaptureSearchResult captureSearchResult)
        throws ResourceNotAvailableException {
    long offset;
 
    String arcfile = captureSearchResult.getFile();
    offset = captureSearchResult.getOffset();

    logger.info("Received request for resource from file '" + arcfile
            + "' at offset '" + offset + "'");

    // Try to lookup the file in the cache
    // make synchronized to disallow more than one using 
    // the cache at any one time
    synchronized (fileCache) {
        File wantedFile = fileCache.get(arcfile);
        try {
            if (wantedFile != null && wantedFile.exists()) {
                logger.debug("Found the file '" + arcfile
                        + "' in the cache. ");
                return ResourceFactory.getResource(wantedFile, offset);
            } else {
                logger.debug("The file '" + arcfile
                        + "' was not found in the cache. ");
                // Get file from bitarchive, and place it in the cachedir
                // directory
                File fileFromBitarchive = new File(fileCache.getCacheDir(),
                        arcfile);
                client.getFile(arcfile, replicaUsed, fileFromBitarchive);
                // put into the cache
                fileCache.put(arcfile, fileFromBitarchive);
                logger.info("File '" + arcfile
                        + "' downloaded from archive and put into the cache '"
                        + fileCache.getCacheDir().getAbsolutePath()
                        + "'.");
                return ResourceFactory.getResource(fileFromBitarchive,
                        offset);
            }
        } catch (IOException e) {
            logger.error("Error looking for non existing resource", e);
            throw new ResourceNotAvailableException(this.getClass()
                    .getName()
                    + "Throws Exception when accessing "
                    + "CaptureResult given from Wayback.");
        }
    }
}
 
开发者ID:netarchivesuite,项目名称:netarchivesuite-svngit-migration,代码行数:56,代码来源:NetarchiveCacheResourceStore.java


注:本文中的org.archive.wayback.core.CaptureSearchResult类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。