当前位置: 首页>>代码示例>>PHP>>正文


PHP Crawler::addHtmlContent方法代码示例

本文整理汇总了PHP中Symfony\Component\DomCrawler\Crawler::addHtmlContent方法的典型用法代码示例。如果您正苦于以下问题:PHP Crawler::addHtmlContent方法的具体用法?PHP Crawler::addHtmlContent怎么用?PHP Crawler::addHtmlContent使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Symfony\Component\DomCrawler\Crawler的用法示例。


在下文中一共展示了Crawler::addHtmlContent方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: process

 /**
  * Process the DOM
  *
  * @return array
  * @throws Exception
  */
 public function process()
 {
     // Check if HTML content is already set
     $this->checkIfContentIsEmpty($this->html);
     $items = [];
     $total = 0;
     $prepareItems = function (Crawler $nodeCrawler, $i) use(&$items, &$total) {
         $title = $nodeCrawler->filter('h3 > a');
         $link = $nodeCrawler->filter('h3 > a')->attr('href');
         $price = $nodeCrawler->filter('p.pricePerUnit')->text();
         $descriptionPage = $this->fetch($link);
         //prepare items array
         $items[$i]['title'] = trim($title->text());
         $items[$i]['size'] = $this->sizeOf($descriptionPage);
         $items[$i]['unit_price'] = $this->format($price);
         $items[$i]['description'] = $this->getDescriptionFor($descriptionPage);
         $total += $items[$i]['unit_price'];
     };
     // bind the closure to the object context
     // so we can access the object inside the closure
     $prepareItems->bindTo($this);
     $this->domCrawler->addHtmlContent($this->html);
     $this->domCrawler->filter('ul.productLister > li')->each($prepareItems);
     $this->items = $items;
     $this->total = number_format($total, 2);
     unset($items);
     unset($total);
     return ['items' => $this->items, 'total' => $this->total];
 }
开发者ID:dilipgurung,项目名称:sainsburys-page-scraper,代码行数:35,代码来源:Scraper.php

示例2: setUp

 public function setUp()
 {
     $this->selectorProvider = new SelectorProvider();
     $this->crawler = new Crawler();
     $this->crawler->addHtmlContent($this->getValidHtml());
     $this->itemsCssSelector = '.list-group .list-group-item';
     $this->noItemsCssSelector = '.not-existing-class';
 }
开发者ID:mkocztorz,项目名称:data-scraper,代码行数:8,代码来源:ExtractListTest.php

示例3: setUp

 public function setUp()
 {
     $this->listSelector = new Css(".list-group .list-group-item");
     $this->emptyResultSelector = new Css(".non-existing");
     $this->emptySelector = new Css("");
     $this->crawler = new Crawler();
     $this->crawler->addHtmlContent($this->getValidHtml());
 }
开发者ID:mkocztorz,项目名称:data-scraper,代码行数:8,代码来源:CssTest.php

示例4: testAddHtmlContent

 /**
  * @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
  */
 public function testAddHtmlContent()
 {
     $crawler = new Crawler();
     $crawler->addHtmlContent('<html><div class="foo"></html>', 'UTF-8');
     $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addHtmlContent() adds nodes from an HTML string');
     $crawler->addHtmlContent('<html><head><base href="http://symfony.com"></head><a href="/contact"></a></html>', 'UTF-8');
     $this->assertEquals('http://symfony.com', $crawler->filter('base')->attr('href'), '->addHtmlContent() adds nodes from an HTML string');
     $this->assertEquals('http://symfony.com/contact', $crawler->filter('a')->link()->getUri(), '->addHtmlContent() adds nodes from an HTML string');
 }
开发者ID:RogerWebb,项目名称:symfony,代码行数:12,代码来源:CrawlerTest.php

示例5: setUp

 public function setUp()
 {
     $this->validCorrectPattern = '/user-(?P<value>\\d+)/';
     $this->validNoMatchPattern = '/NO-MATCH_STRING-(?P<value>\\d+)/';
     $this->validPatternWrongParam = '/NO-MATCH_STRING-(?P<wrong>\\d+)/';
     $this->invalidPattern = '/$%#$>\\d+)))/';
     $this->selectorProvider = new SelectorProvider();
     $this->crawler = new Crawler();
     $this->crawler->addHtmlContent($this->getValidHtml());
     $this->itemCssSelector = '.list-group .list-group-item';
     //will select first
     $this->noItemsCssSelector = '.not-existing-class';
 }
开发者ID:mkocztorz,项目名称:data-scraper,代码行数:13,代码来源:ExtractAttributePatternTest.php

示例6: extract

 /**
  * @param string $url
  * @param array  $tags
  *
  * @return WatchLink
  */
 public function extract(string $url, array $tags) : WatchLink
 {
     $watchLink = new WatchLink();
     $watchLink->setUrl($url);
     $this->crawler->clear();
     $this->crawler->addHtmlContent($this->fetcher->fetch($url));
     $watchLink->setName($this->extractTitle());
     $watchLink->setDescription($this->extractDescription());
     $watchLink->setImage($this->extractImage());
     foreach ($tags as $tag) {
         $watchLink->addTag($this->tagRepository->findOrCreate($tag));
     }
     return $watchLink;
 }
开发者ID:un-zero-un,项目名称:Veilleur,代码行数:20,代码来源:WatchLinkMetadataExtractor.php

示例7: filterPrice

 /**
  * Filter the price present on each countries price page, and return the price
  *
  * @param string $content
  *
  * @return string $price
  */
 public function filterPrice($content)
 {
     $crawler = new Crawler();
     $crawler->addHtmlContent($content);
     $price = $crawler->filterXPath("html/body/div[1]/div[3]/div/div/div[3]/div[4]/div/table/tr[1]/td[2]")->extract('_text', 'td');
     return trim($price[0]);
 }
开发者ID:anthonybieber,项目名称:Scraper,代码行数:14,代码来源:BudgetYourTripParser.php

示例8: transform

 public function transform($category_page_url, $pretty_print_json = false)
 {
     $crawler = new Crawler();
     /** loads the initial category page into a Crawler */
     $crawler->addHtmlContent($this->page_manager->getPage($category_page_url), 'ISO-8859-1');
     $category_page = new CategoryPage($crawler);
     $product_collection = new ProductCollection();
     /** loops through all the products on the category page */
     /** @todo handle cases where HTML structure throws out the crawler more elegantly */
     $category_page->getProducts()->each(function (Crawler $category_page_product_node, $i) use($product_collection) {
         try {
             $product_node = new CategoryPageProductNode($category_page_product_node);
             $url_of_product_page = $product_node->getProductHref();
             $crawler = new Crawler();
             /** loads the product page */
             $crawler->addHtmlContent($this->page_manager->getPage($url_of_product_page), 'ISO-8859-1');
             $product_page = new ProductPage($crawler);
             $product = new Product();
             /** gets the content from either the product or category page and saves it in the product entity */
             $product->setTitle($product_node->getTitle())->setDescription($product_page->getDescription())->setUnitPrice($product_node->getUnitPrice())->setSize($this->page_manager->getSizeOfPage($url_of_product_page));
             $product_collection->addProduct($product);
         } catch (\InvalidArgumentException $ex) {
         }
     });
     /** Combines the results with the total of all the unit prices */
     return json_encode(['results' => $product_collection->toArray(), 'total' => $product_collection->getSumOfUnitPrices() / 100], $pretty_print_json ? JSON_PRETTY_PRINT : 0);
 }
开发者ID:blowski,项目名称:sainsburys-crawler,代码行数:27,代码来源:CategoryPageToJsonTransformer.php

示例9: actionTrypostdata

 /**
  * simulate worldjournal ajax call to fetch content data
  */
 public function actionTrypostdata()
 {
     $hostname = 'www.wjlife.com';
     $optionVaules = ["relation" => "AND", "0" => ["relation" => "AND", "0" => ["key" => "wj_order_id"]]];
     //all help wanted
     $currentURL = "/cls_category/03-ny-help-wanted/";
     //temp page number
     $pno = 0;
     $queryObject = ["keyword" => "", "pagesize" => 40, "pno" => $pno, "optionVaules" => $optionVaules, "currentURL" => "http://" . $hostname . $currentURL, "currentCatId" => 327, "currentStateId" => 152];
     //language: chinese simplified
     $wjlang = "zh-cn";
     $requestUrl = "http://" . $hostname . "/wp-content/themes/wjlife/includes/classified-core.php?regions=state_ny&variant=" . $wjlang . "&t=" . time();
     // echo "start...\n";
     $client = new Client();
     $crawler = $client->request("POST", $requestUrl, $queryObject, [], ['HTTP_X-Requested-With' => 'XMLHttpRequest', 'contentType' => 'application/x-www-form-urlencoded;charset=utf-8']);
     $rowHtml = $crawler->html();
     // if you want to echo out with correct encoding, do `echo utf8_decode($rowHtml)`
     // echo utf8_decode($rowHtml);
     // echo "end...\n";
     $subCrawler = new Crawler();
     $subCrawler->addHtmlContent($rowHtml);
     $linkArray = $subCrawler->filter(".catDesc a")->each(function ($node, $index) {
         return $href = $node->attr('href');
     });
     print_r($linkArray);
 }
开发者ID:njuljsong,项目名称:scrapeAds,代码行数:29,代码来源:ScrapeController.php

示例10: setUp

 public function setUp()
 {
     $html = file_get_contents(__DIR__ . '/../Fixtures/category-page-product-node.html');
     $crawler = new Crawler();
     $crawler->addHtmlContent($html, 'ISO-8859-1');
     $this->SUT = new SUT($crawler);
 }
开发者ID:blowski,项目名称:sainsburys-crawler,代码行数:7,代码来源:CategoryPageProductNodeTest.php

示例11: inlineImages

 /**
  * Replace all src of img.inline-image with an embedded image
  *
  * @param  Swift_Message $message
  */
 protected function inlineImages(Swift_Message $message)
 {
     $html = $message->getBody();
     $crawler = new Crawler();
     $crawler->addHtmlContent($html);
     $imgs = array();
     $replaces = array();
     foreach ($crawler->filterXPath("//img[contains(concat(' ',normalize-space(@class), ' '), ' inline-image ')]") as $img) {
         $normalized_src = $src = $img->getAttribute('src');
         if (isset($replaces['src="' . $src . '"'])) {
             continue;
         }
         // if starting with one slash, use local file
         if (preg_match('#^/[^/]#', $normalized_src)) {
             $normalized_src = $this->web_directory . parse_url($src, PHP_URL_PATH);
         }
         if (!isset($imgs[$normalized_src])) {
             $swift_image = Swift_Image::fromPath($normalized_src);
             $imgs[$normalized_src] = $message->embed($swift_image);
         }
         $replaces['src=\'' . $src . '\''] = 'src="' . $imgs[$normalized_src] . '"';
         $replaces['src="' . $src . '"'] = 'src="' . $imgs[$normalized_src] . '"';
     }
     if (count($replaces)) {
         $html = str_replace(array_keys($replaces), array_values($replaces), $html);
         $message->setBody($html);
     }
 }
开发者ID:wemakecustom,项目名称:swiftmailer-twig-bundle,代码行数:33,代码来源:TwigSwiftHelper.php

示例12: setLaundryState

 public function setLaundryState(&$laundryPlace)
 {
     $user = 'youruser';
     $pass = 'yourpassword';
     try {
         $client = new Client($laundryPlace['url']);
         $request = $client->get('/LaundryState', [], ['auth' => [$user, $pass, 'Digest'], 'timeout' => 1.5, 'connect_timeout' => 1.5]);
         $response = $request->send();
         $body = $response->getBody();
         libxml_use_internal_errors(true);
         $crawler = new Crawler();
         $crawler->addContent($body);
         foreach ($crawler->filter('img') as $img) {
             $resource = $img->getAttribute('src');
             $img->setAttribute('src', 'http://129.241.126.11/' . trim($resource, '/'));
         }
         $crawler->addHtmlContent('<h1>foobar</h1>');
         //'<link href="http://129.241.126.11/pic/public_n.css" type="text/css">');
         $laundryPlace['html'] = $crawler->html();
         libxml_use_internal_errors(false);
         preg_match_all('/bgColor=Green/', $body, $greenMatches);
         preg_match_all('/bgColor=Red/', $body, $redMatches);
         $laundryPlace['busy'] = count($redMatches[0]);
         $laundryPlace['available'] = count($greenMatches[0]);
     } catch (\Exception $e) {
         $laundryPlace['available'] = self::NETWORK_ERROR;
         $laundryPlace['busy'] = self::NETWORK_ERROR;
         $laundryPlace['html'] = self::NETWORK_ERROR;
     }
 }
开发者ID:kcisek,项目名称:sit-washing,代码行数:30,代码来源:MieleService.php

示例13: testAddHtmlContent

    /**
     * @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
     */
    public function testAddHtmlContent()
    {
        $crawler = new Crawler();
        $crawler->addHtmlContent('<html><div class="foo"></html>', 'UTF-8');

        $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addHtmlContent() adds nodes from an HTML string');
    }
开发者ID:nacef,项目名称:symfony,代码行数:10,代码来源:CrawlerTest.php

示例14: test_it_extracts_description

 public function test_it_extracts_description()
 {
     $html = file_get_contents(__DIR__ . '/../Fixtures/product-page.html');
     $crawler = new Crawler();
     $crawler->addHtmlContent($html, 'ISO-8859-1');
     $SUT = new SUT($crawler);
     $this->assertEquals("Apricots", $SUT->getDescription());
 }
开发者ID:blowski,项目名称:sainsburys-crawler,代码行数:8,代码来源:ProductPageTest.php

示例15: scrap

 /**
  * Scraps og:title off the page content
  * @param  string $url
  * @return string
  */
 public function scrap($url)
 {
     $title = 'Unable to parse';
     $this->request->setMethod(HTTP_METH_GET);
     $this->request->setUrl($url);
     try {
         $response = $this->request->send();
         $this->crawler->addHtmlContent($response->getBody());
         $subCrawler = $this->crawler->filterXPath('//head/meta[@property="og:title"]');
         $meta = $subCrawler->getNode(0);
         if ($meta) {
             $title = $meta->getAttribute('content');
         }
     } catch (Exception $e) {
         $title = $e->getMessage();
     }
     return $title;
 }
开发者ID:shitfSign,项目名称:threads,代码行数:23,代码来源:Worker.php


注:本文中的Symfony\Component\DomCrawler\Crawler::addHtmlContent方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。