当前位置: 首页>>代码示例>>PHP>>正文


PHP DomCrawler\Crawler类代码示例

本文整理汇总了PHP中Symfony\Component\DomCrawler\Crawler的典型用法代码示例。如果您正苦于以下问题:PHP Crawler类的具体用法?PHP Crawler怎么用?PHP Crawler使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Crawler类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: getPdfHash

 public function getPdfHash($html, $cssVersion)
 {
     $crawler = new Crawler($html);
     $layoutWrapper = $crawler->filter('div#layout-main-wrapper');
     $htmlHash = $this->getHtmlHash($layoutWrapper);
     return md5($htmlHash . $cssVersion);
 }
开发者ID:thierrymarianne,项目名称:MttBundle,代码行数:7,代码来源:PdfHashingLib.php

示例2: viewAction

 /**
  * Display a page in studio and allow to add/edit gadget
  * @param $id id of the page to display
  */
 public function viewAction($id)
 {
     //Curent app id
     $appid = $this->get('keosu_core.curapp')->getCurApp();
     $em = $this->get('doctrine')->getManager();
     //Get the page we want to edit
     $page = $em->getRepository('KeosuCoreBundle:Page')->find($id);
     //Page Template content as String
     $templateHtml = file_get_contents(TemplateUtil::getPageTemplateAbsolutePath() . $page->getTemplateId());
     //Get all the elements of class "zone" in template dom
     $crawler = new Crawler($templateHtml);
     $zones = $crawler->filter('.zone')->extract(array('id'));
     //Initiate an Array to store all zone in page
     $zoneModelList = array();
     foreach ($zones as $zone) {
         //Look if there is a shared gadget in this zone
         $gadget = $em->getRepository('KeosuCoreBundle:Gadget')->findSharedByZoneAndApp($zone, $appid);
         //If there is no share gadget we try to find the specific one
         if ($gadget == null) {
             //Find the gadget associated with page and zone
             $gadget = $em->getRepository('KeosuCoreBundle:Gadget')->findOneBy(array('zone' => $zone, 'page' => $id));
         }
         $zoneModel = array();
         $zoneModel['gadget'] = $gadget;
         $zoneModel['zoneId'] = $zone;
         $zoneModelList[] = $zoneModel;
     }
     $gadgetList = array();
     $packageList = $this->get('keosu_core.package_manager')->getPackageList(PackageManager::TYPE_PACKAGE_GADGET);
     foreach ($packageList as $p) {
         $gadgetList[] = $p->getName();
     }
     return $this->render('KeosuCoreBundle:Page:studio.html.twig', array('page' => $page, 'zones' => $zoneModelList, 'templatehtml' => $templateHtml, 'gadgets' => $gadgetList));
 }
开发者ID:predever,项目名称:keosu,代码行数:38,代码来源:ManageGadgetsController.php

示例3: register

 /**
  * Register the service provider.
  *
  * @return void
  */
 public function register()
 {
     $app = $this->app;
     $this->app->after(function ($request, $response) use($app) {
         // Only handle non-redirections
         if (!$response->isRedirection()) {
             // Must be a pjax-request
             if ($request->server->get('HTTP_X_PJAX')) {
                 $crawler = new Crawler($response->getContent());
                 // Filter to title (in order to update the browser title bar)
                 $response_title = $crawler->filter('head > title');
                 // Filter to given container
                 $response_container = $crawler->filter($request->server->get('HTTP_X_PJAX_CONTAINER'));
                 // Container must exist
                 if ($response_container->count() != 0) {
                     $title = '';
                     // If a title-attribute exists
                     if ($response_title->count() != 0) {
                         $title = '<title>' . $response_title->html() . '</title>';
                     }
                     // Set new content for the response
                     $response->setContent($title . $response_container->html());
                 }
                 // Updating address bar with the last URL in case there were redirects
                 $response->header('X-PJAX-URL', $request->getRequestUri());
             }
         }
     });
 }
开发者ID:simonstamm,项目名称:laravel-pjax,代码行数:34,代码来源:LaravelPjaxServiceProvider.php

示例4: execute

 protected function execute(InputInterface $input, OutputInterface $output)
 {
     // read url parameter
     $url = $input->getArgument('url');
     // read page parameter
     $page = $input->getArgument('page');
     if (is_numeric($page)) {
         $url = $url . "?page=" . $page;
     }
     // get url content
     $pageContent = file_get_contents($url);
     if ($pageContent !== false) {
         $ph = new PersonHandler();
         $ph->setPersonsUrl($url);
         // read persons table
         $crawler = new Crawler($pageContent);
         $rowElements = $crawler->filter('.view-persona .view-content table.views-table tbody tr');
         // extract each Person
         /* @var $rowElement DOMElement */
         foreach ($rowElements as $rowElement) {
             $p = new Person();
             $ph->readInfo($p, $rowElement);
             $output->writeln($p->toCsv());
         }
     }
 }
开发者ID:nicoladarold,项目名称:addresscrawler,代码行数:26,代码来源:AddressesDumpCommand.php

示例5: productAction

 public function productAction()
 {
     $client = new Client('http://stroyka.by');
     $request = $client->get('/');
     $request->send();
     $resault = $request->getResponse()->getBody(true);
     $crawler = new Crawler();
     //        $crawler->addHTMLContent($resault);
     //        $res = $request->getResponse()->getH;
     //        var_dump($res);exit();
     //        $text = utf8_decode($resault);
     $crawler = new Crawler($resault);
     $resault = $crawler->filter('ul.b-categories')->html();
     //
     //
     //        $crowler2 = new Crawler($resault);
     //
     //        $resault = $crowler2->filter('span.b-categories__name');
     //
     //        $nodeValues = $crawler->filter('span.b-categories__name')->each(function (Crawler $node, $i) {
     //            return utf8_decode($node->text());
     //        });
     //        var_dump(mb_detect_encoding($nodeValues[0]));exit();
     //        foreach($nodeValues as $nv) {
     //            $category = new Category();
     //            $category->setName($nv);
     //            $this->getDoctrine()->getManager()->persist($category);
     //        }
     //        $this->getDoctrine()->getManager()->flush();
     //        var_dump($nodeValues);
     //        var_dump($resault);
     //        exit;
     return $this->render('AcmeAppBundle:Default:product.html.twig', array('result' => $resault));
 }
开发者ID:Tugart,项目名称:parser,代码行数:34,代码来源:DefaultController.php

示例6: execute

 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $httpClient = $this->getHttpClientHelper();
     $site_name = $input->getArgument('site-name');
     $version = $input->getArgument('version');
     if ($version) {
         $release_selected = $version;
     } else {
         // Getting Module page header and parse to get module Node
         $output->writeln('[+] <info>' . sprintf($this->trans('commands.site.new.messages.getting-releases')) . '</info>');
         // Page for Drupal releases filter by Drupal 8
         $project_release_d8 = 'https://www.drupal.org/node/3060/release?api_version%5B%5D=7234';
         // Parse release module page to get Drupal 8 releases
         try {
             $html = $httpClient->getHtml($project_release_d8);
         } catch (\Exception $e) {
             $output->writeln('[+] <error>' . $e->getMessage() . '</error>');
             return;
         }
         $crawler = new Crawler($html);
         $releases = [];
         foreach ($crawler->filter('span.file a') as $element) {
             if (strpos($element->nodeValue, ".tar.gz") > 0) {
                 $release_name = str_replace('.tar.gz', '', str_replace('drupal-', '', $element->nodeValue));
                 $releases[$release_name] = $element->nodeValue;
             }
         }
         if (empty($releases)) {
             $output->writeln('[+] <error>' . $this->trans('commands.module.site.new.no-releases') . '</error>');
             return;
         }
         // List module releases to enable user to select his favorite release
         $questionHelper = $this->getQuestionHelper();
         $question = new ChoiceQuestion('Please select your favorite release', array_combine(array_keys($releases), array_keys($releases)), 0);
         $release_selected = $questionHelper->ask($input, $output, $question);
     }
     $release_file_path = 'http://ftp.drupal.org/files/projects/drupal-' . $release_selected . '.tar.gz';
     // Destination file to download the release
     $destination = tempnam(sys_get_temp_dir(), 'drupal.') . "tar.gz";
     try {
         // Start the process to download the zip file of release and copy in contrib folter
         $output->writeln('[+] <info>' . sprintf($this->trans('commands.site.new.messages.downloading'), $release_selected) . '</info>');
         $httpClient->downloadFile($release_file_path, $destination);
         $output->writeln('[+] <info>' . sprintf($this->trans('commands.site.new.messages.extracting'), $release_selected) . '</info>');
         $zippy = Zippy::load();
         $archive = $zippy->open($destination);
         $archive->extract('./');
         try {
             $fs = new Filesystem();
             $fs->rename('./drupal-' . $release_selected, './' . $site_name);
         } catch (IOExceptionInterface $e) {
             $output->writeln('[+] <error>' . sprintf($this->trans('commands.site.new.messages.error-copying'), $e->getPath()) . '</error>');
         }
         $output->writeln('[+] <info>' . sprintf($this->trans('commands.site.new.messages.downloaded'), $release_selected, $site_name) . '</info>');
     } catch (\Exception $e) {
         $output->writeln('[+] <error>' . $e->getMessage() . '</error>');
         return;
     }
     return true;
 }
开发者ID:GoZOo,项目名称:DrupalConsole,代码行数:60,代码来源:SiteNewCommand.php

示例7: setUp

 public function setUp()
 {
     $html = file_get_contents(__DIR__ . '/../Fixtures/category-page-product-node.html');
     $crawler = new Crawler();
     $crawler->addHtmlContent($html, 'ISO-8859-1');
     $this->SUT = new SUT($crawler);
 }
开发者ID:blowski,项目名称:sainsburys-crawler,代码行数:7,代码来源:CategoryPageProductNodeTest.php

示例8: process

 public function process()
 {
     $crawler = $this->client->request($this->plan['method'], $this->plan['uri']);
     if (isset($this->plan['selector'])) {
         $selection = $crawler->filter($this->plan['selector']);
     } elseif (isset($this->plan['xpath'])) {
         $selection = $crawler->filterXPath($this->plan['path']);
     }
     if ($this->plan['images']) {
         $images = $selection->filterXPath('//img');
         if (iterator_count($images) > 1) {
             foreach ($images as $image) {
                 $crawler = new Crawler($image);
                 $info = parse_url($this->plan['uri']);
                 $url = $info['scheme'] . '://' . $info['host'] . '/' . $crawler->attr('src');
                 if (strpos($crawler->attr('src'), 'http') === 0) {
                     $url = $info['scheme'] . '://' . $info['host'] . '/' . $this->plan['path'] . $crawler->attr('src');
                 }
                 copy($url, SCRYPHP_STORAGE_PATH_IMG . DIRECTORY_SEPARATOR . substr(strrchr($url, "/"), 1));
             }
         }
     }
     file_put_contents(SCRYPHP_STORAGE_PATH_TXT . DIRECTORY_SEPARATOR . time() . uniqid(time(), true) . '.txt', $selection->text());
     return $selection->text();
 }
开发者ID:siad007,项目名称:scryphp,代码行数:25,代码来源:Plan.php

示例9: getSingleFromScreenRant

 /**
  * Scrapes single news item from screenrant
  * 
  * @param  string $url
  * @return string
  */
 public function getSingleFromScreenRant($url)
 {
     $text = '';
     $item = $this->curl($url);
     $crawler = new Crawler($item);
     $html = $crawler->filter('div[itemprop="articleBody"] p')->each(function (Crawler $node, $i) {
         $ht = trim($node->html());
         //filter out unneeded html
         if (strpos($ht, 'contentjumplink')) {
             return false;
         }
         if (strpos($ht, 'type="button"')) {
             return false;
         }
         if (strpos($ht, 'type="hidden"')) {
             return false;
         }
         if (strpos($ht, 'AD BLOCK')) {
             return false;
         }
         if (strpos($ht, 'src=')) {
             preg_match('/.*?<img src="(.*?)"/', $ht, $m);
             if (isset($m[1])) {
                 return "<img src='{$m[1]}' class='img-responsive'/>";
             }
         }
         return '<p>' . preg_replace('/<a.*?>(.*?)<\\/a>/', '$1', $ht) . '</p>';
     });
     return trim(implode('', $html));
 }
开发者ID:samirios1,项目名称:niter,代码行数:36,代码来源:NewsScraper.php

示例10: handle

 /**
  * Handle an incoming request.
  *
  * @param  \Illuminate\Http\Request  $request
  * @param  \Closure  $next
  * @return mixed
  */
 public function handle($request, Closure $next)
 {
     /** @var $response Response */
     $response = $next($request);
     // Only handle non-redirections and must be a pjax-request
     if (!$response->isRedirection() && $request->pjax()) {
         $crawler = new Crawler($response->getContent());
         // Filter to title (in order to update the browser title bar)
         $response_title = $crawler->filter('head > title');
         // Filter to given container
         $response_container = $crawler->filter($request->header('X-PJAX-CONTAINER'));
         // Container must exist
         if ($response_container->count() != 0) {
             $title = '';
             // If a title-attribute exists
             if ($response_title->count() != 0) {
                 $title = '<title>' . $response_title->html() . '</title>';
             }
             // Set new content for the response
             $response->setContent($title . $response_container->html());
         }
         // Updating address bar with the last URL in case there were redirects
         $response->header('X-PJAX-URL', $request->getRequestUri());
     }
     return $response;
 }
开发者ID:michaelotto126,项目名称:shokes,代码行数:33,代码来源:PjaxMiddleware.php

示例11: extractMagnetUrl

 /**
  * @param Show $show
  * @param Episode $episode
  * @param Crawler $page
  * @return null|string
  */
 protected function extractMagnetUrl(Show $show, Episode $episode, Crawler $page)
 {
     $nodes = $page->filter(".detName");
     if ($nodes->count() == 0) {
         return null;
     }
     $tr = $page->filter('#searchResult tr')->eq(1);
     $link = new MagnetLink();
     $a = $tr->filter('td')->eq(1)->filter('a')->eq(0);
     if (!$a) {
         return null;
     }
     $link->setDetailsLink("http://{$this->domain}{$a->attr('href')}");
     $link->setLink(trim($tr->filter('a[title="Download this torrent using magnet"]')->attr('href')));
     $link->setTitle(trim($tr->filter('.detName')->eq(0)->text()));
     $desc = $tr->filter('.detDesc')->text();
     if (!preg_match('/^\\S+ (.*), .*\\s(\\S+)\\s.*,.*\\s(.*)$/', $desc, $matches)) {
         return null;
     }
     $link->setUploaded(trim($matches[1]));
     $link->setSize(trim($matches[2]));
     $link->setAuthor(trim($matches[3]));
     $link->setSeeds(trim($tr->filter('td')->eq(2)->text()));
     $link->setLeeches(trim($tr->filter('td')->eq(3)->text()));
     $link->setEpisode($episode);
     $link->setShow($show);
     return $link;
 }
开发者ID:kipelovets,项目名称:rss,代码行数:34,代码来源:ThePirateBayClient.php

示例12: filterCountryDetails

 /**
  * @param $codeCountry
  *
  * @return Crawler
  */
 private function filterCountryDetails($codeCountry)
 {
     $htmlCountry = file_get_contents('https://galaxy.esn.org/section/' . $codeCountry . "/");
     $crawlerCountry = new Crawler($htmlCountry);
     $countriesElement = $crawlerCountry->filter('div.scrinfo');
     return $countriesElement;
 }
开发者ID:donatienthorez,项目名称:sf_mobilIT_backEnd,代码行数:12,代码来源:ImportCountriesReader.php

示例13: getProductsForUrl

 /**
  * @param string $url The url to scrape.
  * @return \Slice\CliApp\ScrapeResults The results of the scrape task.
  */
 public function getProductsForUrl($url)
 {
     //Grab the remote document contents
     $rawHTML = $this->downloader->download($url);
     //Drop it into a DOM crawler
     $crawler = new Crawler();
     $crawler->addContent($rawHTML);
     try {
         //Use xPath to find all of the product li elements
         $productList = $crawler->filterXPath($this->productListXpath);
     } catch (\InvalidArgumentException $e) {
         //Convert into a Scrape Exception for easy handling by the command
         throw new ScrapeException($this->configValues['error_msg']['product_parse_error']);
     }
     //If there are none the page isn't supported
     if (sizeof($productList) == 0) {
         throw new ScrapeException($this->configValues['error_msg']['no_products']);
     }
     //Loop over each product li
     $productList->each(function ($liCrawler, $i) {
         try {
             //Find the product detail page url from the link
             $productURL = $liCrawler->filterXPath($this->pdpLinkXpath)->attr('href');
         } catch (\InvalidArgumentException $e) {
             //Convert into a Scrape Exception for easy handling by the command
             throw new ScrapeException($this->configValues['error_msg']['product_parse_error']);
         }
         $product = $this->pdpParser->parseUrl($productURL);
         //Populate the final results container
         $this->results->addProduct($product);
     });
     return $this->results;
 }
开发者ID:slice-beans,项目名称:cli-app,代码行数:37,代码来源:WebsiteScraper.php

示例14: isJoomla

 public function isJoomla()
 {
     $baseUrlJoomla = $this->target;
     $validExtension = preg_match("/^.*\\.(jpg|JPG|gif|GIF|doc|DOC|pdf|PDF)\$/", $this->target, $m);
     if ($validExtension) {
         $baseUrlJoomla = $this->getBaseUrlJoomla();
     }
     $header = new FakeHeaders();
     try {
         $client = new Client(['defaults' => ['headers' => ['User-Agent' => $header->getUserAgent()], 'proxy' => $this->proxy, 'timeout' => 30]]);
         $body = $client->get($baseUrlJoomla)->getBody()->getContents();
         $crawler = new Crawler($body);
         $arrLinksMeta = $crawler->filter('meta');
         foreach ($arrLinksMeta as $keyLinkMeta => $valueLinkMeta) {
             $validJoomlaMeta = preg_match('/Joomla!/', $valueLinkMeta->getAttribute('content'), $m, PREG_OFFSET_CAPTURE);
             if ($validJoomlaMeta) {
                 return true;
             }
         }
         $arrLinksScript = $crawler->filter('script');
         foreach ($arrLinksScript as $keyLinkScript => $valueLinkScript) {
             $validJoomlaScript = preg_match("/(\\/media\\/system\\/js\\/mootools(.js|-core.js))/", $valueLinkScript->getAttribute('src'), $m, PREG_OFFSET_CAPTURE);
             if ($validJoomlaScript) {
                 return true;
             }
         }
     } catch (\Exception $e) {
         return false;
     }
     return false;
 }
开发者ID:aszone,项目名称:hacking,代码行数:31,代码来源:Joomla.php

示例15: search

 /**
  * @param string $query
  * @return SearchResult[]
  */
 public function search($query)
 {
     try {
         $response = $this->httpClient->get('http://kickasstorrents.to/usearch/' . urlencode($query) . '/');
     } catch (\GuzzleHttp\Exception\ClientException $e) {
         return [];
     }
     $crawler = new Crawler((string) $response->getBody());
     $items = $crawler->filter('#mainSearchTable tr');
     $results = [];
     $i = 0;
     foreach ($items as $item) {
         // Ignores advertisement and header
         if ($i < 2) {
             $i++;
             continue;
         }
         $itemCrawler = new Crawler($item);
         $name = $itemCrawler->filter('.cellMainLink')->text();
         if (!stristr($name, $query)) {
             continue;
         }
         $data = json_decode(str_replace("'", '"', $itemCrawler->filter('div[data-sc-params]')->attr('data-sc-params')));
         $result = new SearchResult();
         $result->setName($name);
         $result->setSeeders((int) $itemCrawler->filter('td:nth-child(5)')->text());
         $result->setLeechers((int) $itemCrawler->filter('td:nth-child(6)')->text());
         $result->setMagnetUrl($data->magnet);
         $results[] = $result;
     }
     return $results;
 }
开发者ID:xurumelous,项目名称:torrent-scraper,代码行数:36,代码来源:KickassTorrentsAdapter.php


注:本文中的Symfony\Component\DomCrawler\Crawler类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。