本文整理汇总了PHP中Symfony\Component\DomCrawler\Crawler类的典型用法代码示例。如果您正苦于以下问题:PHP Crawler类的具体用法?PHP Crawler怎么用?PHP Crawler使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Crawler类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: getPdfHash
public function getPdfHash($html, $cssVersion)
{
$crawler = new Crawler($html);
$layoutWrapper = $crawler->filter('div#layout-main-wrapper');
$htmlHash = $this->getHtmlHash($layoutWrapper);
return md5($htmlHash . $cssVersion);
}
示例2: viewAction
/**
* Display a page in studio and allow to add/edit gadget
* @param $id id of the page to display
*/
public function viewAction($id)
{
//Curent app id
$appid = $this->get('keosu_core.curapp')->getCurApp();
$em = $this->get('doctrine')->getManager();
//Get the page we want to edit
$page = $em->getRepository('KeosuCoreBundle:Page')->find($id);
//Page Template content as String
$templateHtml = file_get_contents(TemplateUtil::getPageTemplateAbsolutePath() . $page->getTemplateId());
//Get all the elements of class "zone" in template dom
$crawler = new Crawler($templateHtml);
$zones = $crawler->filter('.zone')->extract(array('id'));
//Initiate an Array to store all zone in page
$zoneModelList = array();
foreach ($zones as $zone) {
//Look if there is a shared gadget in this zone
$gadget = $em->getRepository('KeosuCoreBundle:Gadget')->findSharedByZoneAndApp($zone, $appid);
//If there is no share gadget we try to find the specific one
if ($gadget == null) {
//Find the gadget associated with page and zone
$gadget = $em->getRepository('KeosuCoreBundle:Gadget')->findOneBy(array('zone' => $zone, 'page' => $id));
}
$zoneModel = array();
$zoneModel['gadget'] = $gadget;
$zoneModel['zoneId'] = $zone;
$zoneModelList[] = $zoneModel;
}
$gadgetList = array();
$packageList = $this->get('keosu_core.package_manager')->getPackageList(PackageManager::TYPE_PACKAGE_GADGET);
foreach ($packageList as $p) {
$gadgetList[] = $p->getName();
}
return $this->render('KeosuCoreBundle:Page:studio.html.twig', array('page' => $page, 'zones' => $zoneModelList, 'templatehtml' => $templateHtml, 'gadgets' => $gadgetList));
}
示例3: register
/**
* Register the service provider.
*
* @return void
*/
public function register()
{
$app = $this->app;
$this->app->after(function ($request, $response) use($app) {
// Only handle non-redirections
if (!$response->isRedirection()) {
// Must be a pjax-request
if ($request->server->get('HTTP_X_PJAX')) {
$crawler = new Crawler($response->getContent());
// Filter to title (in order to update the browser title bar)
$response_title = $crawler->filter('head > title');
// Filter to given container
$response_container = $crawler->filter($request->server->get('HTTP_X_PJAX_CONTAINER'));
// Container must exist
if ($response_container->count() != 0) {
$title = '';
// If a title-attribute exists
if ($response_title->count() != 0) {
$title = '<title>' . $response_title->html() . '</title>';
}
// Set new content for the response
$response->setContent($title . $response_container->html());
}
// Updating address bar with the last URL in case there were redirects
$response->header('X-PJAX-URL', $request->getRequestUri());
}
}
});
}
示例4: execute
protected function execute(InputInterface $input, OutputInterface $output)
{
// read url parameter
$url = $input->getArgument('url');
// read page parameter
$page = $input->getArgument('page');
if (is_numeric($page)) {
$url = $url . "?page=" . $page;
}
// get url content
$pageContent = file_get_contents($url);
if ($pageContent !== false) {
$ph = new PersonHandler();
$ph->setPersonsUrl($url);
// read persons table
$crawler = new Crawler($pageContent);
$rowElements = $crawler->filter('.view-persona .view-content table.views-table tbody tr');
// extract each Person
/* @var $rowElement DOMElement */
foreach ($rowElements as $rowElement) {
$p = new Person();
$ph->readInfo($p, $rowElement);
$output->writeln($p->toCsv());
}
}
}
示例5: productAction
public function productAction()
{
$client = new Client('http://stroyka.by');
$request = $client->get('/');
$request->send();
$resault = $request->getResponse()->getBody(true);
$crawler = new Crawler();
// $crawler->addHTMLContent($resault);
// $res = $request->getResponse()->getH;
// var_dump($res);exit();
// $text = utf8_decode($resault);
$crawler = new Crawler($resault);
$resault = $crawler->filter('ul.b-categories')->html();
//
//
// $crowler2 = new Crawler($resault);
//
// $resault = $crowler2->filter('span.b-categories__name');
//
// $nodeValues = $crawler->filter('span.b-categories__name')->each(function (Crawler $node, $i) {
// return utf8_decode($node->text());
// });
// var_dump(mb_detect_encoding($nodeValues[0]));exit();
// foreach($nodeValues as $nv) {
// $category = new Category();
// $category->setName($nv);
// $this->getDoctrine()->getManager()->persist($category);
// }
// $this->getDoctrine()->getManager()->flush();
// var_dump($nodeValues);
// var_dump($resault);
// exit;
return $this->render('AcmeAppBundle:Default:product.html.twig', array('result' => $resault));
}
示例6: execute
protected function execute(InputInterface $input, OutputInterface $output)
{
$httpClient = $this->getHttpClientHelper();
$site_name = $input->getArgument('site-name');
$version = $input->getArgument('version');
if ($version) {
$release_selected = $version;
} else {
// Getting Module page header and parse to get module Node
$output->writeln('[+] <info>' . sprintf($this->trans('commands.site.new.messages.getting-releases')) . '</info>');
// Page for Drupal releases filter by Drupal 8
$project_release_d8 = 'https://www.drupal.org/node/3060/release?api_version%5B%5D=7234';
// Parse release module page to get Drupal 8 releases
try {
$html = $httpClient->getHtml($project_release_d8);
} catch (\Exception $e) {
$output->writeln('[+] <error>' . $e->getMessage() . '</error>');
return;
}
$crawler = new Crawler($html);
$releases = [];
foreach ($crawler->filter('span.file a') as $element) {
if (strpos($element->nodeValue, ".tar.gz") > 0) {
$release_name = str_replace('.tar.gz', '', str_replace('drupal-', '', $element->nodeValue));
$releases[$release_name] = $element->nodeValue;
}
}
if (empty($releases)) {
$output->writeln('[+] <error>' . $this->trans('commands.module.site.new.no-releases') . '</error>');
return;
}
// List module releases to enable user to select his favorite release
$questionHelper = $this->getQuestionHelper();
$question = new ChoiceQuestion('Please select your favorite release', array_combine(array_keys($releases), array_keys($releases)), 0);
$release_selected = $questionHelper->ask($input, $output, $question);
}
$release_file_path = 'http://ftp.drupal.org/files/projects/drupal-' . $release_selected . '.tar.gz';
// Destination file to download the release
$destination = tempnam(sys_get_temp_dir(), 'drupal.') . "tar.gz";
try {
// Start the process to download the zip file of release and copy in contrib folter
$output->writeln('[+] <info>' . sprintf($this->trans('commands.site.new.messages.downloading'), $release_selected) . '</info>');
$httpClient->downloadFile($release_file_path, $destination);
$output->writeln('[+] <info>' . sprintf($this->trans('commands.site.new.messages.extracting'), $release_selected) . '</info>');
$zippy = Zippy::load();
$archive = $zippy->open($destination);
$archive->extract('./');
try {
$fs = new Filesystem();
$fs->rename('./drupal-' . $release_selected, './' . $site_name);
} catch (IOExceptionInterface $e) {
$output->writeln('[+] <error>' . sprintf($this->trans('commands.site.new.messages.error-copying'), $e->getPath()) . '</error>');
}
$output->writeln('[+] <info>' . sprintf($this->trans('commands.site.new.messages.downloaded'), $release_selected, $site_name) . '</info>');
} catch (\Exception $e) {
$output->writeln('[+] <error>' . $e->getMessage() . '</error>');
return;
}
return true;
}
示例7: setUp
public function setUp()
{
$html = file_get_contents(__DIR__ . '/../Fixtures/category-page-product-node.html');
$crawler = new Crawler();
$crawler->addHtmlContent($html, 'ISO-8859-1');
$this->SUT = new SUT($crawler);
}
示例8: process
public function process()
{
$crawler = $this->client->request($this->plan['method'], $this->plan['uri']);
if (isset($this->plan['selector'])) {
$selection = $crawler->filter($this->plan['selector']);
} elseif (isset($this->plan['xpath'])) {
$selection = $crawler->filterXPath($this->plan['path']);
}
if ($this->plan['images']) {
$images = $selection->filterXPath('//img');
if (iterator_count($images) > 1) {
foreach ($images as $image) {
$crawler = new Crawler($image);
$info = parse_url($this->plan['uri']);
$url = $info['scheme'] . '://' . $info['host'] . '/' . $crawler->attr('src');
if (strpos($crawler->attr('src'), 'http') === 0) {
$url = $info['scheme'] . '://' . $info['host'] . '/' . $this->plan['path'] . $crawler->attr('src');
}
copy($url, SCRYPHP_STORAGE_PATH_IMG . DIRECTORY_SEPARATOR . substr(strrchr($url, "/"), 1));
}
}
}
file_put_contents(SCRYPHP_STORAGE_PATH_TXT . DIRECTORY_SEPARATOR . time() . uniqid(time(), true) . '.txt', $selection->text());
return $selection->text();
}
示例9: getSingleFromScreenRant
/**
* Scrapes single news item from screenrant
*
* @param string $url
* @return string
*/
public function getSingleFromScreenRant($url)
{
$text = '';
$item = $this->curl($url);
$crawler = new Crawler($item);
$html = $crawler->filter('div[itemprop="articleBody"] p')->each(function (Crawler $node, $i) {
$ht = trim($node->html());
//filter out unneeded html
if (strpos($ht, 'contentjumplink')) {
return false;
}
if (strpos($ht, 'type="button"')) {
return false;
}
if (strpos($ht, 'type="hidden"')) {
return false;
}
if (strpos($ht, 'AD BLOCK')) {
return false;
}
if (strpos($ht, 'src=')) {
preg_match('/.*?<img src="(.*?)"/', $ht, $m);
if (isset($m[1])) {
return "<img src='{$m[1]}' class='img-responsive'/>";
}
}
return '<p>' . preg_replace('/<a.*?>(.*?)<\\/a>/', '$1', $ht) . '</p>';
});
return trim(implode('', $html));
}
示例10: handle
/**
* Handle an incoming request.
*
* @param \Illuminate\Http\Request $request
* @param \Closure $next
* @return mixed
*/
public function handle($request, Closure $next)
{
/** @var $response Response */
$response = $next($request);
// Only handle non-redirections and must be a pjax-request
if (!$response->isRedirection() && $request->pjax()) {
$crawler = new Crawler($response->getContent());
// Filter to title (in order to update the browser title bar)
$response_title = $crawler->filter('head > title');
// Filter to given container
$response_container = $crawler->filter($request->header('X-PJAX-CONTAINER'));
// Container must exist
if ($response_container->count() != 0) {
$title = '';
// If a title-attribute exists
if ($response_title->count() != 0) {
$title = '<title>' . $response_title->html() . '</title>';
}
// Set new content for the response
$response->setContent($title . $response_container->html());
}
// Updating address bar with the last URL in case there were redirects
$response->header('X-PJAX-URL', $request->getRequestUri());
}
return $response;
}
示例11: extractMagnetUrl
/**
* @param Show $show
* @param Episode $episode
* @param Crawler $page
* @return null|string
*/
protected function extractMagnetUrl(Show $show, Episode $episode, Crawler $page)
{
$nodes = $page->filter(".detName");
if ($nodes->count() == 0) {
return null;
}
$tr = $page->filter('#searchResult tr')->eq(1);
$link = new MagnetLink();
$a = $tr->filter('td')->eq(1)->filter('a')->eq(0);
if (!$a) {
return null;
}
$link->setDetailsLink("http://{$this->domain}{$a->attr('href')}");
$link->setLink(trim($tr->filter('a[title="Download this torrent using magnet"]')->attr('href')));
$link->setTitle(trim($tr->filter('.detName')->eq(0)->text()));
$desc = $tr->filter('.detDesc')->text();
if (!preg_match('/^\\S+ (.*), .*\\s(\\S+)\\s.*,.*\\s(.*)$/', $desc, $matches)) {
return null;
}
$link->setUploaded(trim($matches[1]));
$link->setSize(trim($matches[2]));
$link->setAuthor(trim($matches[3]));
$link->setSeeds(trim($tr->filter('td')->eq(2)->text()));
$link->setLeeches(trim($tr->filter('td')->eq(3)->text()));
$link->setEpisode($episode);
$link->setShow($show);
return $link;
}
示例12: filterCountryDetails
/**
* @param $codeCountry
*
* @return Crawler
*/
private function filterCountryDetails($codeCountry)
{
$htmlCountry = file_get_contents('https://galaxy.esn.org/section/' . $codeCountry . "/");
$crawlerCountry = new Crawler($htmlCountry);
$countriesElement = $crawlerCountry->filter('div.scrinfo');
return $countriesElement;
}
示例13: getProductsForUrl
/**
* @param string $url The url to scrape.
* @return \Slice\CliApp\ScrapeResults The results of the scrape task.
*/
public function getProductsForUrl($url)
{
//Grab the remote document contents
$rawHTML = $this->downloader->download($url);
//Drop it into a DOM crawler
$crawler = new Crawler();
$crawler->addContent($rawHTML);
try {
//Use xPath to find all of the product li elements
$productList = $crawler->filterXPath($this->productListXpath);
} catch (\InvalidArgumentException $e) {
//Convert into a Scrape Exception for easy handling by the command
throw new ScrapeException($this->configValues['error_msg']['product_parse_error']);
}
//If there are none the page isn't supported
if (sizeof($productList) == 0) {
throw new ScrapeException($this->configValues['error_msg']['no_products']);
}
//Loop over each product li
$productList->each(function ($liCrawler, $i) {
try {
//Find the product detail page url from the link
$productURL = $liCrawler->filterXPath($this->pdpLinkXpath)->attr('href');
} catch (\InvalidArgumentException $e) {
//Convert into a Scrape Exception for easy handling by the command
throw new ScrapeException($this->configValues['error_msg']['product_parse_error']);
}
$product = $this->pdpParser->parseUrl($productURL);
//Populate the final results container
$this->results->addProduct($product);
});
return $this->results;
}
示例14: isJoomla
public function isJoomla()
{
$baseUrlJoomla = $this->target;
$validExtension = preg_match("/^.*\\.(jpg|JPG|gif|GIF|doc|DOC|pdf|PDF)\$/", $this->target, $m);
if ($validExtension) {
$baseUrlJoomla = $this->getBaseUrlJoomla();
}
$header = new FakeHeaders();
try {
$client = new Client(['defaults' => ['headers' => ['User-Agent' => $header->getUserAgent()], 'proxy' => $this->proxy, 'timeout' => 30]]);
$body = $client->get($baseUrlJoomla)->getBody()->getContents();
$crawler = new Crawler($body);
$arrLinksMeta = $crawler->filter('meta');
foreach ($arrLinksMeta as $keyLinkMeta => $valueLinkMeta) {
$validJoomlaMeta = preg_match('/Joomla!/', $valueLinkMeta->getAttribute('content'), $m, PREG_OFFSET_CAPTURE);
if ($validJoomlaMeta) {
return true;
}
}
$arrLinksScript = $crawler->filter('script');
foreach ($arrLinksScript as $keyLinkScript => $valueLinkScript) {
$validJoomlaScript = preg_match("/(\\/media\\/system\\/js\\/mootools(.js|-core.js))/", $valueLinkScript->getAttribute('src'), $m, PREG_OFFSET_CAPTURE);
if ($validJoomlaScript) {
return true;
}
}
} catch (\Exception $e) {
return false;
}
return false;
}
示例15: search
/**
* @param string $query
* @return SearchResult[]
*/
public function search($query)
{
try {
$response = $this->httpClient->get('http://kickasstorrents.to/usearch/' . urlencode($query) . '/');
} catch (\GuzzleHttp\Exception\ClientException $e) {
return [];
}
$crawler = new Crawler((string) $response->getBody());
$items = $crawler->filter('#mainSearchTable tr');
$results = [];
$i = 0;
foreach ($items as $item) {
// Ignores advertisement and header
if ($i < 2) {
$i++;
continue;
}
$itemCrawler = new Crawler($item);
$name = $itemCrawler->filter('.cellMainLink')->text();
if (!stristr($name, $query)) {
continue;
}
$data = json_decode(str_replace("'", '"', $itemCrawler->filter('div[data-sc-params]')->attr('data-sc-params')));
$result = new SearchResult();
$result->setName($name);
$result->setSeeders((int) $itemCrawler->filter('td:nth-child(5)')->text());
$result->setLeechers((int) $itemCrawler->filter('td:nth-child(6)')->text());
$result->setMagnetUrl($data->magnet);
$results[] = $result;
}
return $results;
}