当前位置: 首页>>代码示例>>PHP>>正文


PHP Crawler::crawl方法代码示例

本文整理汇总了PHP中Crawler::crawl方法的典型用法代码示例。如果您正苦于以下问题:PHP Crawler::crawl方法的具体用法?PHP Crawler::crawl怎么用?PHP Crawler::crawl使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Crawler的用法示例。


在下文中一共展示了Crawler::crawl方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: crawlForNews

 /**
  * Start the crawler to retrieve pages from a given news website
  * @param type $nrOfDaysBack The nr of days the crawler should go back (counting from today)
  * @param type $newsSiteUrl The root URL of the news site (the seed of the crawler)
  * @return type
  */
 public function crawlForNews($nrOfDaysBack, $newsSiteUrl, $timeToLive, $startDate = null)
 {
     $crawler = new Crawler($newsSiteUrl, $timeToLive);
     if ($startDate) {
         $crawler->crawl($nrOfDaysBack, $startDate);
     } else {
         $crawler->crawl($nrOfDaysBack);
     }
     return count($crawler->getCrawled());
 }
开发者ID:Bram9205,项目名称:WebInfo,代码行数:16,代码来源:Main.php

示例2: getHotSpots

 public function getHotSpots()
 {
     $crawler = new Crawler($this);
     $outlines = new CrawlerOutlineCollection();
     $size = $this->image->size();
     for ($x = 0; $x < $size[0]; $x++) {
         for ($y = 0; $y < $size[1]; $y++) {
             $pixel = $this->pixel($x, $y);
             // Skip white pixels
             if ($pixel->color()->compare(ImageColor::white(), 5)) {
                 continue;
             }
             // Skip crawled areas
             if ($outlines->contains($pixel)) {
                 continue;
             }
             // Start crawling
             $outline = $crawler->crawl($x, $y);
             $outlines->push($outline);
         }
     }
     $hotspots = new ImageCollection();
     foreach ($outlines as $outline) {
         $hotspots->push($this->image->sliceByOutline($outline));
     }
     return array($hotspots, $outlines);
 }
开发者ID:passbolt,项目名称:passbolt_selenium,代码行数:27,代码来源:imagepixelmatrix.php

示例3: crawl

 /**
  * Parsing
  *
  * @throws Exception
  */
 public function crawl($url)
 {
     $crawler = new Crawler();
     $crawler->on($crawler::EVENT_HIT_CRAWL, function ($href, DOMDocument $dom) {
         $start = microtime(true);
         $imgLength = $dom->getElementsByTagName('img')->length;
         $time = microtime(true) - $start;
         $processTime = sprintf('%.6F', $time);
         $this->report[] = ['href' => $href, 'imgLength' => $imgLength, 'processTime' => $processTime];
         $this->show('  - ' . $href . ' [img: ' . $imgLength . ']' . PHP_EOL);
     });
     $crawler->on($crawler::EVENT_BEFORE_CRAWL, function () {
         $this->show('Start crawl' . PHP_EOL);
     });
     $crawler->on($crawler::EVENT_AFTER_CRAWL, function () {
         $this->show('Finish crawl' . PHP_EOL);
     });
     $crawler->crawl($url);
 }
开发者ID:kazak,项目名称:test,代码行数:24,代码来源:Application.php

示例4: define

    define('DIR_ROOT', dirname(__FILE__));
}
if (!defined('DIR_KVZLIB')) {
    $lookIn = array('/Users/kevin/workspace/kvzlib', '/home/kevin/workspace/kvzlib', DIR_ROOT . '/ext/kvzlib');
    foreach ($lookIn as $dir) {
        if (is_dir($dir) && file_exists($dir . '/kvzlib.php')) {
            define('DIR_KVZLIB', $dir);
            break;
        }
    }
    if (!defined('DIR_KVZLIB')) {
        trigger_error('KvzLib not found in either: ' . implode(', ', $lookIn), E_USER_ERROR);
    }
}
define('IMDBPHP_CONFIG', DIR_ROOT . '/config/imdb.php');
ini_set("include_path", DIR_KVZLIB . ":" . DIR_ROOT . ":" . ini_get("include_path"));
require_once DIR_KVZLIB . '/php/classes/KvzShell.php';
require_once DIR_KVZLIB . '/php/classes/KvzHTML.php';
require_once DIR_KVZLIB . '/php/all_functions.php';
require_once DIR_ROOT . '/libs/crawler.php';
require_once DIR_ROOT . '/libs/movie.php';
require_once DIR_ROOT . '/libs/store.php';
require_once 'imdb.class.php';
$outDir = '/home/kevin/Dropbox/Public/cinema';
$outFile = 'kijken.html';
$crawlerOptions = array('dir' => '/data/moviesHD', 'minSize' => '600M', 'cachedir' => DIR_ROOT . '/cache', 'photodir' => $outDir . '/images');
$Crawler = new Crawler($crawlerOptions);
$movies = $Crawler->crawl();
$Store = new Store($movies, 'html', array('photovirt' => 'images', 'outputdir' => $outDir, 'outputfile' => $outFile, 'separate_on_dir' => 1));
$Store->save();
#$Store->output();
开发者ID:joericochuyt,项目名称:kvzlib,代码行数:31,代码来源:moviexplore.php

示例5: extractContent

 /**
  * @param string $url
  * @param string $rawHTML
  */
 public function extractContent($url, $rawHTML = null)
 {
     $crawler = new Crawler($this->config);
     $article = $crawler->crawl($url, $rawHTML);
     return $article;
 }
开发者ID:scotteh,项目名称:php-goose,代码行数:10,代码来源:Client.php

示例6: generate

 /**
  * Generate sitemap
  *
  * @return $this
  */
 public function generate()
 {
     $this->crawler->crawl();
     foreach ($this->crawler->getFoundUrls() as $url => $status) {
         if ($status == \Magelight\Sitemap\Models\Crawler::STATUS_SUCCESS) {
             $this->urls[] = ['loc' => $url, 'priority' => $this->getUrlPriority($url), 'changefreq' => $this->getUrlChangeFrequency($url)];
         }
     }
     return $this;
 }
开发者ID:rganin,项目名称:magelight,代码行数:15,代码来源:Sitemap.php


注:本文中的Crawler::crawl方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。