本文整理汇总了PHP中Symfony\Component\DomCrawler\Crawler::filter方法的典型用法代码示例。如果您正苦于以下问题:PHP Crawler::filter方法的具体用法?PHP Crawler::filter怎么用?PHP Crawler::filter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Symfony\Component\DomCrawler\Crawler
的用法示例。
在下文中一共展示了Crawler::filter方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: run
public function run()
{
$c = new Color();
$crawler = new Crawler();
$response = $this->client->request('GET', $this->search . '&start=' . $this->getLimit());
$crawler->addContent((string) $response->getBody());
$data = file_get_contents(ROOT . '/lib/results.csv');
foreach ($crawler->filter('cite') as $url) {
$row = ['name' => '', 'd7' => '', 'd8' => '', 'url' => ''];
echo "Found: {$url->nodeValue}" . PHP_EOL;
// Attempt to make a request to D.O to get details about the module.
$res = $this->client->request('GET', $url->nodeValue);
if ($res->getStatusCode() > 400) {
echo $c("Unable to fetch data")->red() . PHP_EOL;
continue;
}
$body = (string) $res->getBody();
if (empty($body)) {
echo $c('Unable to fetch body')->red() . PHP_EOL;
continue;
}
$crawl = new Crawler();
$crawl->addContent($res->getBody());
// Add the known elements.
$row['name'] = trim($crawl->filter('#page-subtitle')->text());
$row['url'] = trim($url->nodeValue);
// The help block often has information about the status to D8.
if (count($crawl->filter('.help'))) {
$help = $crawl->filter('.help')->text();
if (strpos($help, 'ported to Drupal 8') > -1) {
$row['d8'] = 'In progress';
}
}
foreach ($crawl->filter('[data-th="Version"]') as $version) {
$version = $version->nodeValue;
if (strpos($version, '7.x') > -1) {
$row['d7'] = trim($version);
continue;
}
if (strpos($version, '8.x') > -1) {
$row['d8'] = trim($verison);
continue;
}
}
// This module hasn't been ported to D7 - so continue.
if (empty($row['d7']) && empty($row['d8'])) {
echo $c('<bg_yellow>This is a Drupal 6 module</bg_yellow>')->colorize() . PHP_EOL;
continue;
}
$data .= implode(',', array_values($row)) . "\n";
echo $c('Successfully added metadata')->green() . PHP_EOL;
}
$h = fopen(ROOT . '/lib/results.csv', 'w');
fwrite($h, $data);
// Increment the limit.
$limit = $this->getLimit() + 10;
echo $c('Updating the limit from <yellow>' . $this->getLimit() . '</yellow> to <yellow>' . $limit . '</yellow>')->colorize() . PHP_EOL;
$this->setLimit($limit);
}
示例2: addProduct
/**
* Add product data
*
* @param Crawler $node
*/
private function addProduct(Crawler $node)
{
$product = new \stdClass();
/** @var Crawler $title */
$title = $node->filter('h3 a');
$product->title = trim($title->text());
$product->unit_price = $this->getPrice($node->filter('.pricePerUnit'));
$productCrawler = $this->client->request('GET', $title->attr('href'));
$product->size = $this->bytesToKb(strlen($this->client->getResponse()->getContent()));
$product->description = '';
$description = $productCrawler->filterXPath('//h3[.="Description"]');
if ($description->count() > 0) {
foreach ($description->siblings() as $sibling) {
// product pages have different structures!
if ($sibling->tagName == 'h3') {
break;
}
if ($product->description != "") {
$product->description .= "\n";
}
// @TODO address formatting issues - breaks to new lines
$product->description .= trim(preg_replace("/[^\\S\r\n]+/", " ", $sibling->nodeValue));
// remove excess whitespace but not new lines
}
}
$this->total += $product->unit_price;
// increment total
$this->results[] = $product;
}
示例3: parse
public static function parse($contents, $id, $type)
{
$crawler = new Crawler();
$crawler->addHTMLContent($contents, 'UTF-8');
$rows = $crawler->filter('div[class="spaceit_pad"]');
$title = preg_replace('/ (\\w+?) Details/', '$2', $crawler->filter('div[class="normal_header"]')->text());
$result = array();
if ($type === 'anime') {
foreach ($rows as $historyItem) {
$crawler = new Crawler($historyItem);
$date = explode(' ', $crawler->text());
$historyinfo['item'] = new Anime();
$historyinfo['item']->setId((int) $id);
$historyinfo['item']->setTitle($title);
$historyinfo['item']->setWatchedEpisodes((int) $date[1]);
$historyinfo['type'] = $type;
$historyinfo['time_updated'] = Date::formatTime($date[4] . ' ' . $date[6]);
$result[] = $historyinfo;
}
} else {
foreach ($rows as $historyItem) {
$crawler = new Crawler($historyItem);
$date = explode(' ', $crawler->text());
$historyinfo['item'] = new Manga();
$historyinfo['item']->setId((int) $id);
$historyinfo['item']->setTitle($title);
$historyinfo['item']->setChaptersRead((int) $date[1]);
$historyinfo['type'] = $type;
$historyinfo['time_updated'] = Date::formatTime($date[4] . ' ' . $date[6]);
$result[] = $historyinfo;
}
}
return $result;
}
示例4: parse
/**
* @inheritdoc
*/
protected function parse(Requests_Response $requests)
{
$crawler = new Crawler();
$crawler->addContent($requests->body);
$r = $crawler->filter("#page > main > section > div > div.result-item-list article a > .box-row");
$results = array();
/** @var DOMElement $el */
foreach ($r as $el) {
$c = new Crawler();
$c->add($el);
$tags = [];
/** @var DOMElement $z */
foreach ($c->filter(".box-row ul.box-row-item-attribute-list li") as $z) {
if ($z->childNodes !== null && $z->childNodes->length >= 4) {
$tags[] = $z->childNodes->item(1)->nodeValue . ": " . $z->childNodes->item(3)->nodeValue;
}
}
$addressB = $c->filter(".item-title--street");
$address = $addressB->text() . " " . $addressB->siblings()->text();
$tags[] = "Adresse: " . $address;
$result = new Result();
$result->setTags($tags);
$result->setTitle(trim($c->filter("h2")->text()));
if ($c->filter("item-description p")->valid()) {
$result->setDescription($c->filter("item-description p")->text());
}
$link = $el->parentNode->attributes->getNamedItem("href")->nodeValue;
$result->setId($this->getName() . "_" . explode("/", $link)[2]);
$result->setUrl("http://m.homegate.ch/" . $link);
$results[] = $result;
}
return $results;
}
示例5: parse
/**
* @inheritdoc
*/
protected function parse(Requests_Response $requests)
{
$crawler = new Crawler();
$crawler->addContent($requests->body);
$r = $crawler->filterXPath('//*[@id="content"]/div/div[2]/div[1]/div[1]/ul/li');
$results = array();
/** @var DOMElement $el */
foreach ($r as $el) {
$c = new Crawler();
$c->add($el);
$tags = [];
/** @var DOMElement $z */
foreach ($c->filter(".horizontal-separated-list li") as $z) {
$tags[] = $z->textContent;
}
$result = new Result();
$result->setTitle(trim($c->filter(".details a")->text()));
$result->setTags($tags);
$relUrl = $c->filter(".details a")->attr("href");
$id = explode("--", explode("/", parse_url($relUrl)["path"])[2])[1];
$result->setId($this->getName() . "_" . intval($id));
$result->setUrl("http://www.anibis.ch/" . $relUrl);
$result->setPrice($c->filter(".price")->text());
$result->setDescription($c->filter(".details .description")->text());
$results[] = $result;
}
return $results;
}
示例6: hbo
/**
* @param Crawler $crawler
*
* Meta
* - price
* - price_per_month
* - flexibel [bool]
* - duration [integer]
* - education_level [string]
* - examination_costs [float]
* - 21_plus_test [bool]
* - recognized_diploma
* - specialized_literature_amount
* - practice_sessions_count
*/
public function hbo(Crawler $crawler, Request $request)
{
$data = new ArrayObject();
$this->fetch($request, 'title', function () use($crawler, $data) {
$title = trim($crawler->filter('h1')->text());
$data['title'] = $title;
$data['uid'] = 'nti-' . Str::slug($title);
$data['slug'] = Str::slug($title);
});
$this->fetch($request, 'teaser', function () use($crawler, $data) {
$data['teaser'] = trim($crawler->filter('.ParagraafOrder_1 p')->first()->text());
});
$this->fetch($request, 'price', function () use($crawler, $data, $request) {
try {
$price = $crawler->filter('.old-lesgeld-table tr')->eq(1)->filter('td')->eq(1)->text();
dd($price);
} catch (\Exception $e) {
$this->error('aaarg');
$price = $crawler->filter('.old-lesgeld-table tr')->count();
$this->info($request->getUri()->getPath());
dd($price);
dd($e->getMessage());
}
$data['meta'][] = ['name' => 'price', 'value' => 0];
});
$this->fetch($request, 'flexible', function () use($crawler, $data) {
$data['meta'][] = ['name' => 'flexible_course', 'value' => strstr($data['title'], 'Klassikaal') ? 0 : 1];
});
// Save to the storage
$this->store($request, $data);
}
示例7: search
/**
* @param string $query
* @return SearchResult[]
*/
public function search($query)
{
try {
$response = $this->httpClient->get('https://thepiratebay.se/search/' . urlencode($query) . '/0/7/0');
} catch (ClientException $e) {
return [];
}
$crawler = new Crawler((string) $response->getBody());
$items = $crawler->filter('#searchResult tr');
$results = [];
$first = true;
foreach ($items as $item) {
// Ignore the first row, the header
if ($first) {
$first = false;
continue;
}
$result = new SearchResult();
$itemCrawler = new Crawler($item);
$result->setName(trim($itemCrawler->filter('.detName')->text()));
$result->setSeeders((int) $itemCrawler->filter('td')->eq(2)->text());
$result->setLeechers((int) $itemCrawler->filter('td')->eq(3)->text());
$result->setMagnetUrl($itemCrawler->filterXpath('//tr/td/a')->attr('href'));
$results[] = $result;
}
return $results;
}
示例8: parse
public static function parse($contents)
{
$crawler = new Crawler();
$crawler->addHTMLContent($contents, 'UTF-8');
$rows = $crawler->filter('table[class="mt8 episode_list js-watch-episode-list ascend"] tr[class="episode-list-data"]');
$result = array();
foreach ($rows as $episodeItem) {
$crawler = new Crawler($episodeItem);
$episode = new Episode();
$episode->setNumber($crawler->filter('td[class="episode-number nowrap"]')->text());
$episode->setTitle($crawler->filter('td[class="episode-title"] a')->text());
// MAL does not always provide the air date!
$date = $crawler->filter('td[class="episode-aired"]')->text();
if ($date !== 'N/A') {
$dateTime = new DateTime();
$episode->setAirDate($dateTime->createFromFormat('M j, Y', $date));
}
$extracted = $crawler->filter('td[class="episode-title"] span[class="di-ib"]');
if ($extracted->text() !== '' && $extracted->count() > 0) {
# English:
$extracted = explode('(', $extracted->text());
if (count($extracted) > 0) {
$other_titles['english'] = array(trim($extracted[0], chr(0xc2) . chr(0xa0)));
}
# Japanese:
if (count($extracted) > 1) {
$other_titles['japanese'] = array(trim(str_replace(')', '', $extracted[1])));
}
$episode->setOtherTitles($other_titles);
}
$result[] = $episode;
}
return $result;
}
示例9: pruneUserComments
protected function pruneUserComments(Issue $issue, DoBrowser $browser, $comment_words, InputInterface $input, OutputInterface $output)
{
$deleted_comments = 0;
/** @var \DOMElement $comment */
foreach ($issue->getCrawler()->filter('section.comments div.comment') as $comment) {
$words = 0;
$crawler = new Crawler($comment);
if ($crawler->filter('.nodechanges-file-changes')->count() > 0) {
// Has a file attached ignore.
continue;
}
$comment_body = $crawler->filter('.field-name-comment-body div.field-item');
if ($comment_body->count()) {
$text = $comment_body->text();
$words = str_word_count(trim($text));
}
// Zero word comments are often issue summary updates extra - ignore them
// for now.
if ($words <= $comment_words) {
$changes = $crawler->filter('.field-name-field-issue-changes div.field-item');
if ($changes->count()) {
$output->writeln("Comment issue changes: " . trim($changes->text()));
}
$output->writeln("Comment text: " . trim($text));
if ($this->askConfirmation($input, $output, 'Delete this comment (yes/NO)? ')) {
$delete_link = $crawler->filter('li.comment-delete a, div.system-message.queued-retesting li.comment-delete a')->extract(array('href'));
$delete_link = $delete_link[0];
$this->deleteComment($delete_link, $browser, $output);
$deleted_comments++;
}
$output->writeln('');
}
}
$output->writeln("Deleted {$deleted_comments} user comments.");
}
示例10: extractMagnetUrl
/**
* @param Show $show
* @param Episode $episode
* @param Crawler $page
* @return null|string
*/
protected function extractMagnetUrl(Show $show, Episode $episode, Crawler $page)
{
$nodes = $page->filter(".detName");
if ($nodes->count() == 0) {
return null;
}
$tr = $page->filter('#searchResult tr')->eq(1);
$link = new MagnetLink();
$a = $tr->filter('td')->eq(1)->filter('a')->eq(0);
if (!$a) {
return null;
}
$link->setDetailsLink("http://{$this->domain}{$a->attr('href')}");
$link->setLink(trim($tr->filter('a[title="Download this torrent using magnet"]')->attr('href')));
$link->setTitle(trim($tr->filter('.detName')->eq(0)->text()));
$desc = $tr->filter('.detDesc')->text();
if (!preg_match('/^\\S+ (.*), .*\\s(\\S+)\\s.*,.*\\s(.*)$/', $desc, $matches)) {
return null;
}
$link->setUploaded(trim($matches[1]));
$link->setSize(trim($matches[2]));
$link->setAuthor(trim($matches[3]));
$link->setSeeds(trim($tr->filter('td')->eq(2)->text()));
$link->setLeeches(trim($tr->filter('td')->eq(3)->text()));
$link->setEpisode($episode);
$link->setShow($show);
return $link;
}
示例11: __invoke
public function __invoke(RequestInterface $request, array $options)
{
$fn = $this->nextHandler;
if (empty($options['ftp_credentials'])) {
return $fn($request, $options);
} elseif (!$options['ftp_credentials'] instanceof FtpCredentials) {
throw new \RuntimeException(sprintf('The "ftp_credentials" key must be an instance of "%s".', FtpCredentials::class));
}
$credentials = $options['ftp_credentials'];
return $fn($request, $options)->then(function (ResponseInterface $response) use($credentials, $request, $options) {
$crawler = new Crawler((string) $response->getBody(), (string) $request->getUri());
try {
// Try to find the form.
$formNode = $crawler->filter('form#authorize-filetransfer-form');
if (!$formNode->count()) {
return $response;
}
// Form found - do we have the credentials?
if (!$credentials->present()) {
throw new FtpCredentialsRequiredException();
}
$ftpForm = $formNode->form();
if (!empty($options['__ftp_credentials_submitted'])) {
// The form was already submitted and we got it again - must be with an error message.
$error = null;
$errorNode = $crawler->filter('p.error');
if ($errorNode->count()) {
$error = $errorNode->text() ?: null;
}
throw new FtpCredentialsErrorException($error);
}
// connection_settings[authorize_filetransfer_default]:ftp
// connection_settings[ftp][username]:
// connection_settings[ftp][password]:
// connection_settings[ftp][advanced][hostname]:localhost
// connection_settings[ftp][advanced][port]:21
// connection_settings[ssh][username]:
// connection_settings[ssh][password]:
// connection_settings[ssh][advanced][hostname]:localhost
// connection_settings[ssh][advanced][port]:22
$submitValues = $ftpForm->getValues();
// This is not handled by the form component, but Drupal requires this button to be pressed.
// The "Continue" string does not have to be translated upon continuing, it's just here for
// the convenience.
$submitValues['process_updates'] = 'Continue';
$submitValues['connection_settings']['authorize_filetransfer_default'] = $credentials->getMethod();
$submitValues['connection_settings'][$credentials->getMethod()] = ['username' => $credentials->getUsername(), 'password' => $credentials->getPassword(), 'advanced' => ['hostname' => $credentials->getHost(), 'port' => $credentials->getPort()]];
} catch (FtpCredentialsRequiredException $e) {
throw $e;
} catch (FtpCredentialsErrorException $e) {
throw $e;
} catch (\Exception $e) {
// The form was not found - continue.
return $response;
}
$newRequest = $request->withMethod($ftpForm->getMethod())->withUri(new Uri($ftpForm->getUri()))->withBody(\GuzzleHttp\Psr7\stream_for(http_build_query($submitValues, null, '&')))->withHeader('content-type', 'application/x-www-form-urlencoded');
$options['__ftp_credentials_submitted'] = true;
return $this($newRequest, $options);
});
}
示例12: search
/**
* @param string $query
* @return SearchResult[]
*/
public function search($query)
{
try {
$response = $this->httpClient->get('https://eztv.ag/search/' . $this->transformSearchString($query));
} catch (ClientException $e) {
return [];
}
$crawler = new Crawler((string) $response->getBody());
$items = $crawler->filter('tr.forum_header_border');
$results = [];
foreach ($items as $item) {
$result = new SearchResult();
$itemCrawler = new Crawler($item);
$result->setName(trim($itemCrawler->filter('td')->eq(1)->text()));
$result->setSeeders($this->options['seeders']);
$result->setLeechers($this->options['leechers']);
$node = $itemCrawler->filter('a.download_1');
if ($node->count() > 0) {
$result->setTorrentUrl($node->eq(0)->attr('href'));
}
$node = $itemCrawler->filter('a.magnet');
if ($node->count() > 0) {
$result->setMagnetUrl($node->eq(0)->attr('href'));
}
$results[] = $result;
}
return $results;
}
示例13: isJoomla
public function isJoomla()
{
$baseUrlJoomla = $this->target;
$validExtension = preg_match("/^.*\\.(jpg|JPG|gif|GIF|doc|DOC|pdf|PDF)\$/", $this->target, $m);
if ($validExtension) {
$baseUrlJoomla = $this->getBaseUrlJoomla();
}
$header = new FakeHeaders();
try {
$client = new Client(['defaults' => ['headers' => ['User-Agent' => $header->getUserAgent()], 'proxy' => $this->proxy, 'timeout' => 30]]);
$body = $client->get($baseUrlJoomla)->getBody()->getContents();
$crawler = new Crawler($body);
$arrLinksMeta = $crawler->filter('meta');
foreach ($arrLinksMeta as $keyLinkMeta => $valueLinkMeta) {
$validJoomlaMeta = preg_match('/Joomla!/', $valueLinkMeta->getAttribute('content'), $m, PREG_OFFSET_CAPTURE);
if ($validJoomlaMeta) {
return true;
}
}
$arrLinksScript = $crawler->filter('script');
foreach ($arrLinksScript as $keyLinkScript => $valueLinkScript) {
$validJoomlaScript = preg_match("/(\\/media\\/system\\/js\\/mootools(.js|-core.js))/", $valueLinkScript->getAttribute('src'), $m, PREG_OFFSET_CAPTURE);
if ($validJoomlaScript) {
return true;
}
}
} catch (\Exception $e) {
return false;
}
return false;
}
示例14: handle
/**
* Handle an incoming request.
*
* @param \Illuminate\Http\Request $request
* @param \Closure $next
* @return mixed
*/
public function handle($request, Closure $next)
{
/** @var $response Response */
$response = $next($request);
// Only handle non-redirections and must be a pjax-request
if (!$response->isRedirection() && $request->pjax()) {
$crawler = new Crawler($response->getContent());
// Filter to title (in order to update the browser title bar)
$response_title = $crawler->filter('head > title');
// Filter to given container
$response_container = $crawler->filter($request->header('X-PJAX-CONTAINER'));
// Container must exist
if ($response_container->count() != 0) {
$title = '';
// If a title-attribute exists
if ($response_title->count() != 0) {
$title = '<title>' . $response_title->html() . '</title>';
}
// Set new content for the response
$response->setContent($title . $response_container->html());
}
// Updating address bar with the last URL in case there were redirects
$response->header('X-PJAX-URL', $request->getRequestUri());
}
return $response;
}
示例15: register
/**
* Register the service provider.
*
* @return void
*/
public function register()
{
$app = $this->app;
$this->app->after(function ($request, $response) use($app) {
// Only handle non-redirections
if (!$response->isRedirection()) {
// Must be a pjax-request
if ($request->server->get('HTTP_X_PJAX')) {
$crawler = new Crawler($response->getContent());
// Filter to title (in order to update the browser title bar)
$response_title = $crawler->filter('head > title');
// Filter to given container
$response_container = $crawler->filter($request->server->get('HTTP_X_PJAX_CONTAINER'));
// Container must exist
if ($response_container->count() != 0) {
$title = '';
// If a title-attribute exists
if ($response_title->count() != 0) {
$title = '<title>' . $response_title->html() . '</title>';
}
// Set new content for the response
$response->setContent($title . $response_container->html());
}
// Updating address bar with the last URL in case there were redirects
$response->header('X-PJAX-URL', $request->getRequestUri());
}
}
});
}