本文整理汇总了PHP中Crawler::crawl方法的典型用法代码示例。如果您正苦于以下问题:PHP Crawler::crawl方法的具体用法?PHP Crawler::crawl怎么用?PHP Crawler::crawl使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Crawler
的用法示例。
在下文中一共展示了Crawler::crawl方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: crawlForNews
/**
* Start the crawler to retrieve pages from a given news website
* @param type $nrOfDaysBack The nr of days the crawler should go back (counting from today)
* @param type $newsSiteUrl The root URL of the news site (the seed of the crawler)
* @return type
*/
public function crawlForNews($nrOfDaysBack, $newsSiteUrl, $timeToLive, $startDate = null)
{
$crawler = new Crawler($newsSiteUrl, $timeToLive);
if ($startDate) {
$crawler->crawl($nrOfDaysBack, $startDate);
} else {
$crawler->crawl($nrOfDaysBack);
}
return count($crawler->getCrawled());
}
示例2: getHotSpots
public function getHotSpots()
{
$crawler = new Crawler($this);
$outlines = new CrawlerOutlineCollection();
$size = $this->image->size();
for ($x = 0; $x < $size[0]; $x++) {
for ($y = 0; $y < $size[1]; $y++) {
$pixel = $this->pixel($x, $y);
// Skip white pixels
if ($pixel->color()->compare(ImageColor::white(), 5)) {
continue;
}
// Skip crawled areas
if ($outlines->contains($pixel)) {
continue;
}
// Start crawling
$outline = $crawler->crawl($x, $y);
$outlines->push($outline);
}
}
$hotspots = new ImageCollection();
foreach ($outlines as $outline) {
$hotspots->push($this->image->sliceByOutline($outline));
}
return array($hotspots, $outlines);
}
示例3: crawl
/**
* Parsing
*
* @throws Exception
*/
public function crawl($url)
{
$crawler = new Crawler();
$crawler->on($crawler::EVENT_HIT_CRAWL, function ($href, DOMDocument $dom) {
$start = microtime(true);
$imgLength = $dom->getElementsByTagName('img')->length;
$time = microtime(true) - $start;
$processTime = sprintf('%.6F', $time);
$this->report[] = ['href' => $href, 'imgLength' => $imgLength, 'processTime' => $processTime];
$this->show(' - ' . $href . ' [img: ' . $imgLength . ']' . PHP_EOL);
});
$crawler->on($crawler::EVENT_BEFORE_CRAWL, function () {
$this->show('Start crawl' . PHP_EOL);
});
$crawler->on($crawler::EVENT_AFTER_CRAWL, function () {
$this->show('Finish crawl' . PHP_EOL);
});
$crawler->crawl($url);
}
示例4: define
define('DIR_ROOT', dirname(__FILE__));
}
if (!defined('DIR_KVZLIB')) {
$lookIn = array('/Users/kevin/workspace/kvzlib', '/home/kevin/workspace/kvzlib', DIR_ROOT . '/ext/kvzlib');
foreach ($lookIn as $dir) {
if (is_dir($dir) && file_exists($dir . '/kvzlib.php')) {
define('DIR_KVZLIB', $dir);
break;
}
}
if (!defined('DIR_KVZLIB')) {
trigger_error('KvzLib not found in either: ' . implode(', ', $lookIn), E_USER_ERROR);
}
}
define('IMDBPHP_CONFIG', DIR_ROOT . '/config/imdb.php');
ini_set("include_path", DIR_KVZLIB . ":" . DIR_ROOT . ":" . ini_get("include_path"));
require_once DIR_KVZLIB . '/php/classes/KvzShell.php';
require_once DIR_KVZLIB . '/php/classes/KvzHTML.php';
require_once DIR_KVZLIB . '/php/all_functions.php';
require_once DIR_ROOT . '/libs/crawler.php';
require_once DIR_ROOT . '/libs/movie.php';
require_once DIR_ROOT . '/libs/store.php';
require_once 'imdb.class.php';
$outDir = '/home/kevin/Dropbox/Public/cinema';
$outFile = 'kijken.html';
$crawlerOptions = array('dir' => '/data/moviesHD', 'minSize' => '600M', 'cachedir' => DIR_ROOT . '/cache', 'photodir' => $outDir . '/images');
$Crawler = new Crawler($crawlerOptions);
$movies = $Crawler->crawl();
$Store = new Store($movies, 'html', array('photovirt' => 'images', 'outputdir' => $outDir, 'outputfile' => $outFile, 'separate_on_dir' => 1));
$Store->save();
#$Store->output();
示例5: extractContent
/**
* @param string $url
* @param string $rawHTML
*/
public function extractContent($url, $rawHTML = null)
{
$crawler = new Crawler($this->config);
$article = $crawler->crawl($url, $rawHTML);
return $article;
}
示例6: generate
/**
* Generate sitemap
*
* @return $this
*/
public function generate()
{
$this->crawler->crawl();
foreach ($this->crawler->getFoundUrls() as $url => $status) {
if ($status == \Magelight\Sitemap\Models\Crawler::STATUS_SUCCESS) {
$this->urls[] = ['loc' => $url, 'priority' => $this->getUrlPriority($url), 'changefreq' => $this->getUrlChangeFrequency($url)];
}
}
return $this;
}