本文整理汇总了PHP中Symfony\Component\DomCrawler\Crawler::filterXpath方法的典型用法代码示例。如果您正苦于以下问题:PHP Crawler::filterXpath方法的具体用法?PHP Crawler::filterXpath怎么用?PHP Crawler::filterXpath使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Symfony\Component\DomCrawler\Crawler
的用法示例。
在下文中一共展示了Crawler::filterXpath方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: search
/**
* Search for torrents.
*
* @param string $query
* @param int $category
* @return array Array of torrents. Either empty or filled.
*/
public function search($query, $category)
{
# Set single-cell view for torrents.
$requestOptions = ['headers' => ['User-Agent' => 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36']];
try {
$url = $this->makeUrl($query, $category);
$response = $this->httpClient->get($url, $requestOptions);
$crawler = new Crawler((string) $response->getBody());
} catch (\Exception $e) {
return [];
}
$items = $crawler->filterXpath('//channel/item');
$torrents = [];
foreach ($items as $item) {
$torrent = new Torrent();
$itemCrawler = new Crawler($item);
// Set details for torrent.
$torrent->setSite($this->tag);
$torrent->setTitle($itemCrawler->filterXpath('//title')->text());
$torrent->setSeeders((int) $itemCrawler->filterXpath('//torrent:seeds')->text());
$torrent->setLeechers((int) $itemCrawler->filterXpath('//torrent:peers')->text());
$torrent->setMagnet($itemCrawler->filterXpath('//torrent:magnetURI')->text());
$torrent->setSize($this->formatBytes((int) $itemCrawler->filterXPath('//torrent:contentLength')->text()));
$torrent->setAge($itemCrawler->filterXPath('//pubDate')->text());
$torrent->setCategory($itemCrawler->filterXPath('//category')->text());
$torrents[] = $torrent;
}
return $torrents;
}
示例2: getTitleFallback
public function getTitleFallback(Crawler $crawler)
{
$title = '';
if ($crawler->filterXpath('//title')->count()) {
$title = $crawler->filterXpath('//title')->text();
}
return $title;
}
示例3: search
/**
* @param string $query
* @return SearchResult[]
*/
public function search($query)
{
try {
$response = $this->httpClient->get('https://thepiratebay.se/search/' . urlencode($query) . '/0/7/0');
} catch (ClientException $e) {
return [];
}
$crawler = new Crawler((string) $response->getBody());
$items = $crawler->filter('#searchResult tr');
$results = [];
$first = true;
foreach ($items as $item) {
// Ignore the first row, the header
if ($first) {
$first = false;
continue;
}
$result = new SearchResult();
$itemCrawler = new Crawler($item);
$result->setName(trim($itemCrawler->filter('.detName')->text()));
$result->setSeeders((int) $itemCrawler->filter('td')->eq(2)->text());
$result->setLeechers((int) $itemCrawler->filter('td')->eq(3)->text());
$result->setMagnetUrl($itemCrawler->filterXpath('//tr/td/a')->attr('href'));
$results[] = $result;
}
return $results;
}
示例4: crawl
function crawl($url, $afterCrawl = null, $beforeCrawl = null)
{
$md5Url = md5($url);
try {
$this->crawledUrls[] = $md5Url;
$response = $this->client->request("GET", $url);
$html = $response->getBody()->getContents();
$this->requestCount++;
echo $this->requestCount . "\n";
unset($response);
if (isset($afterCrawl)) {
$afterCrawl($url, $html);
}
$domCrawler = new DomCrawler($html);
unset($html);
$urlsToCrawl = array_unique($domCrawler->filterXpath('//a')->extract(['href']));
unset($domCrawler);
foreach ($urlsToCrawl as $urlToCrawl) {
$urlNormalized = $this->normalizeUrl($url, $urlToCrawl);
if ($this->maxRequestcount != 0 && $this->requestCount >= $this->maxRequestcount) {
return;
}
if ($this->isCrawlable($url, $urlNormalized)) {
if (isset($beforeCrawl) && !$beforeCrawl($urlNormalized)) {
continue;
}
$this->crawl($urlNormalized, $afterCrawl, $beforeCrawl);
}
}
} catch (\Exception $e) {
$this->errorUrls[] = $url;
}
}
示例5: load
public function load($gameweek)
{
$this->gameweek = $gameweek;
$crawler = $this->getDom("http://fantasy.premierleague.com/fixtures/{$gameweek}/", ['X-Requested-With' => 'XMLHttpRequest']);
$deadline = $crawler->filter('.ismStrongCaption')->text();
$this->deadline_time = $this->parseDate(array_pop(explode(' - ', $deadline)));
$games = $crawler->filter('.ismFixture');
foreach ($games as $g) {
$gc = new Crawler($g);
$match = new GameweekMatch();
$home_team_id = (int) preg_replace('~^.+badge_(\\d+).+$~', '$1', $gc->filterXpath('//td[3]/img')->attr('src'));
$away_team_id = (int) preg_replace('~^.+badge_(\\d+).+$~', '$1', $gc->filterXpath('//td[5]/img')->attr('src'));
$match->home_team = new TeamSimple();
$match->home_team->load($home_team_id);
$match->away_team = new TeamSimple();
$match->away_team->load($away_team_id);
$match->start_time = $this->parseDate($gc->filterXpath('//td[1]')->text());
$this->matches[] = $match;
}
}
示例6: lookup
/**
* {@inheritdoc}
*/
public function lookup($id)
{
if (!class_exists('Symfony\\Component\\DomCrawler\\Crawler')) {
throw new RuntimeException('symfony/dom-crawler is required.');
}
$url = sprintf('https://play.google.com/store/apps/details?id=%s', $id);
$response = $this->exec($url);
if (404 === $this->getInfo(CURLINFO_HTTP_CODE)) {
throw new NotFoundException($id);
}
$this->close();
$crawler = new Crawler($response);
$name = $crawler->filterXpath("descendant-or-self::*[contains(concat(' ', normalize-space(@class), ' '), ' document-title ')]/descendant::div")->text();
$owner = $crawler->filterXpath("descendant-or-self::*[contains(concat(' ', normalize-space(@class), ' '), ' document-subtitle ') and (contains(concat(' ', normalize-space(@class), ' '), ' primary '))]/descendant::span")->text();
$description = $crawler->filterXpath('//*[@id="body-content"]/div/div/div[1]/div[1]/div/div[3]/div[1]/div[1]/div/div[1]')->text();
$screenshots = $crawler->filterXpath("descendant-or-self::*[contains(concat(' ', normalize-space(@class), ' '), ' details-section ') and (contains(concat(' ', normalize-space(@class), ' '), ' screenshots '))]/descendant::*[contains(concat(' ', normalize-space(@class), ' '), ' screenshot-container ')]/descendant::img")->each(function ($node, $i) {
if (0 === $i) {
return;
}
return $node->attr('src');
});
$screenshots = array_filter($screenshots);
$tags = array_map('strtolower', (array) $crawler->filterXpath("descendant-or-self::*[contains(concat(' ', normalize-space(@class), ' '), ' document-subtitle ') and (contains(concat(' ', normalize-space(@class), ' '), ' category '))]/descendant::span")->text());
$cover = $crawler->filterXpath("descendant-or-self::*[contains(concat(' ', normalize-space(@class), ' '), ' details-info ')]/descendant::*[contains(concat(' ', normalize-space(@class), ' '), ' cover-container ')]/descendant::img")->attr('src');
return new AppInfo($id, $name, $owner, $description, $cover, $tags, $screenshots, AppInfo::PLATFORM_ANDROID);
}
示例7: populateLeagues
public function populateLeagues($crawler)
{
$types = [];
foreach ($crawler->filterXPath('//*[@class="ismSecondary"]/*[@class="ismTableHeading"]') as $h) {
$c = new Crawler($h);
if (preg_match('~ leagues$~', $c->text())) {
$types[] = strtolower(preg_replace('~ leagues$~', '', $c->text()));
}
}
foreach ($crawler->filterXPath('//*[@class="ismTable ismLeagueTable"]') as $i => $t) {
$table = new Crawler($t);
$leagues = $table->filterXpath('//tbody/tr');
foreach ($leagues as $l) {
$c = new Crawler($l);
$league = new UserLeague();
$league->type = $types[$i];
$league->populate($c);
$this->leagues[] = $league;
}
}
}
示例8: search
/**
* Search for torrents.
*
* @param string $query
* @param int $category
* @return array Array of torrents. Either empty or filled.
*/
public function search($query, $category)
{
# Set single-cell view for torrents.
$requestOptions = ['headers' => ['User-Agent' => 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'], 'cookies' => ['lw' => 's']];
try {
$url = $this->makeUrl($query, $category);
$response = $this->httpClient->get($url, $requestOptions);
$crawler = new Crawler((string) $response->getBody());
} catch (\Exception $e) {
// TODO: Log error. Some error has occured.
return [];
}
$items = $crawler->filter('#searchResult tr');
$torrents = [];
$firstRow = true;
foreach ($items as $item) {
// Ignore the first row.
if ($firstRow) {
$firstRow = false;
continue;
}
$torrent = new Torrent();
$itemCrawler = new Crawler($item);
// Set details for torrent.
$torrent->setSite($this->tag);
$torrent->setTitle(trim($itemCrawler->filter('td')->eq(1)->text()));
$torrent->setSeeders((int) $itemCrawler->filter('td')->eq(5)->text());
$torrent->setLeechers((int) $itemCrawler->filter('td')->eq(6)->text());
$torrent->setMagnet($itemCrawler->filterXpath('/td[3]/a[0]')->attr('href'));
$torrent->setSize($itemCrawler->filter('td')->eq(4)->text());
$torrent->setAge($itemCrawler->filterXPath('/td[2]')->text());
$torrent->setCategory($itemCrawler->filterXPath('/td[0]')->text());
$torrents[] = $torrent;
}
return $torrents;
}
示例9: getAllLinks
/**
* Get all links in the given html.
*
* @param string $html
*
* @return \Spatie\Crawler\Url[]
*/
protected function getAllLinks($html)
{
$domCrawler = new DomCrawler($html);
return collect($domCrawler->filterXpath('//a')->extract(['href']))->map(function ($url) {
return Url::create($url);
});
}
示例10: createAssetsFromDOMElements
/**
* @param $html
* @param $selector
* @param $urlAttribute
* @param $type
* @param $parentPage
* @return array
*/
protected function createAssetsFromDOMElements($html, $selector, $urlAttribute, $type, $parentPage)
{
$assets = [];
$crawler = new Crawler($html);
$elements = $crawler->filterXpath($selector);
/** @var \DOMElement $assetElement */
foreach ($elements as $element) {
if (!empty($element->getAttribute($urlAttribute))) {
$urlValue = $element->getAttribute($urlAttribute);
if ($this->config->ignoreWhiteSpaces) {
$urlValue = trim($urlValue);
}
$assets[] = new Asset($urlValue, $parentPage, $element->ownerDocument->saveHTML($element), $type);
}
}
return $assets;
}
示例11: findImage
/**
* @return $this|null
*/
public function findImage()
{
$crawler = new Crawler($this->html);
$images = $crawler->filterXpath('//img')->extract(['src']);
if (empty($images)) {
return null;
}
$this->image_uri = $this->getUri($images[0]);
return $this;
}
示例12: extractAction
public function extractAction($html)
{
$crawler = new Crawler();
$crawler->add($html);
/*$crawler = $crawler->filter('body')->nextAll();
foreach ($crawler as $domElement) {
$nodeValue = $domElement->nodeValue;
}*/
$readData = $crawler->filterXpath('//body/p')->extract(array('_text', 'class'));
/*
* print_r($readData); - Array ( [0] => Array ( [0] => Hello World! [1] => message ) [1] => Array ( [0] => Hello Crawler! [1] => ) [2] => Array
*
* Throw empty data from array readData and create arrays $showDataEven and $showDataOdd
*/
/* Prvi nacin
$i = 0;
$j = 0;
$showData = array();
foreach($readData as $row){
foreach($row as $key=>$value) {
if ($key == 0) {
if(($j % 2) == 0){
$showDataEven[$i] = $value;
$showDataA[$a][$b] = $value;
$b++;
}else{
$showDataOdd[$i] = $value;
$i++;
$showDataA[$a][$b] = $value;
$a++;
$b = 0;
}
}
}
$j++;
}
*/
/*
* $showDataEven: Array ( [0] => Hello World! [1] => Hello World2! [2] => Hello World3! [3] => Hello World4! )
* and $showDataOdd: Array ( [0] => Hello Crawler! [1] => Hello Crawler2! [2] => Hello Crawler3! [3] => Hello Crawler4! )
*/
/*$i = 0;
$j = 0;
foreach($showDataEven as $keyeven=>$valueeven){
$showData[$i][$j] = $valueeven;
$j++;
foreach($showDataOdd as $keyodd=>$valueodd){
if($keyeven == $keyodd){
$showData[$i][$j] = $valueodd;
$j = 0;
}
}
$i++;
}*/
$a = 0;
$b = 0;
/* Drugi nacin */
$showDataA = array();
foreach ($readData as $row) {
foreach ($row as $key => $value) {
if ($key == 0) {
if ($b % 2 == 0) {
$showDataA[$a][$b] = $value;
$b++;
} else {
$showDataA[$a][$b] = $value;
$a++;
$b = 0;
}
}
}
}
//print_r($showDataA);
return $showDataA;
//$crawler = $crawler->filter('body')->children()->text();
//return $crawler;
}
示例13: extractAction
public function extractAction($url)
{
//$html = htmlspecialchars_decode($url);
//print_r($html);
$crawler = new Crawler();
$crawler->add($url);
/*$crawler = $crawler->filter('body')->nextAll();
foreach ($crawler as $domElement) {
$nodeValue = $domElement->nodeValue;
}*/
/*
*
*
filterXpath('//html/body/div/div/form/div/div/table/tbody/tr/td/a/img')
*
*
*/
$readData = $crawler->filterXpath('//html/body/div/div/form/div/div');
/*
$readData0 = $crawler
->filterXpath('//html/body/div/div/form/div/div')
->extract(array('_text', 'class'))
;
print_r($readData0);
*/
$html = '';
foreach ($readData as $domElement) {
$html .= $domElement->ownerDocument->saveHTML($domElement);
}
$crawler = new Crawler();
$crawler->add($html);
// /html/body/div/table
$readData1 = $crawler->filterXpath('//html/body/div/table/tr/th');
$readData11 = $crawler->filterXpath('//html/body/div/table/tr/th')->extract(array('_text', 'class'));
/**** getting ID: to first array $showData1 *****/
$showData1 = array();
$j1 = 0;
foreach ($readData11 as $keyrow => $valuerow) {
if ($keyrow % 2 == 0) {
foreach ($valuerow as $keyid => $valueid) {
if ($keyid % 2 == 0) {
$showData1[$j1] = $valueid;
}
}
$j1++;
}
}
/******** Reading data from table - tr-td - $crawler->add($html);********/
// /html/body/div/table
$readData3 = $crawler->filterXpath('//html/body/div/table/tr/td');
$readData33 = $crawler->filterXpath('//html/body/div/table/tr/td')->extract(array('_text', 'class'));
/**** getting Description: to third array $showData3 *****/
$showData3 = array();
$j3 = 0;
foreach ($readData33 as $keyrow => $valuerow) {
if ($keyrow % 2 == 0) {
foreach ($valuerow as $keydesc => $valuedesc) {
if ($keydesc % 2 == 0) {
$showData3[$j3] = $valuedesc;
}
}
$j3++;
}
}
/*
$html3 = '';
foreach ($readData3 as $domElement) {
$html3 .= $domElement->ownerDocument->saveHTML($domElement);
}
*/
/******** Reading data URL from table - tr-td - $crawler->add($html);********/
// /html/body/div/table
$readData4 = $crawler->filterXpath('//html/body/div/table/tr/td/a/img');
$readData44 = $crawler->filterXpath('//html/body/div/table/tr/td/a/img')->extract(array('src', 'img'));
/**** getting URL: to fourth array $showData4 *****/
$showData4 = array();
$j4 = 0;
foreach ($readData44 as $keyrow => $valuerow) {
foreach ($valuerow as $keyurl => $valueurl) {
if ($keyurl % 2 == 0) {
$showData4[$j4] = $valueurl;
}
}
$j4++;
}
$html4 = '';
foreach ($readData4 as $domElement) {
$html4 .= $domElement->ownerDocument->saveHTML($domElement);
}
/******** Reading data from table - tr-th - input - $crawler->add($html1);********/
// /html/body/div/table
$html1 = '';
foreach ($readData1 as $domElement) {
$html1 .= $domElement->ownerDocument->saveHTML($domElement);
}
$crawler = new Crawler();
$crawler->add($html1);
$readData2 = $crawler->filterXpath('//html/body/th/input');
$readData22 = $crawler->filterXpath('//html/body/th/input')->extract(array('value', 'input'));
/* $reducedSubsetCrawler = $crawler->reduce(function (Crawler $crawler, $i) {
//.........这里部分代码省略.........
示例14: getFirstPageAttributes
/**
* @param $html
* @return array
*/
private function getFirstPageAttributes($html)
{
$crawler = new DomCrawler\Crawler($html);
$nodeValues = $crawler->filterXpath(self::PRODUCT_XPATH)->each(function (DomCrawler\Crawler $node, $i) {
$descXpath = '//div[contains(concat(" ", normalize-space(@class), " "), " productInfo ")]/h3/a';
$priceXpath = '//p[contains(concat(" ", normalize-space(@class), " "), " pricePerUnit ")]';
$priceRegEx = '/([0-9]+[.|,][0-9])|([0-9][.|,][0-9]+)|([0-9]+)/i';
$thisLink = $node->filterXPath($descXpath)->first();
$thisPriceText = trim($node->filterXPath($priceXpath)->first()->text());
preg_match($priceRegEx, $thisPriceText, $priceMatch);
return array('title' => trim($thisLink->text()), 'link' => $thisLink->attr('href'), 'price' => $priceMatch[0]);
});
return $nodeValues;
}