当前位置: 首页>>代码示例>>PHP>>正文


PHP DomDocument::loadHTML方法代码示例

本文整理汇总了PHP中DomDocument::loadHTML方法的典型用法代码示例。如果您正苦于以下问题:PHP DomDocument::loadHTML方法的具体用法?PHP DomDocument::loadHTML怎么用?PHP DomDocument::loadHTML使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在DomDocument的用法示例。


在下文中一共展示了DomDocument::loadHTML方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: __construct

 /**
  * Create a HTMLDoc object
  * @param string $html The HTML to parse
  */
 public function __construct($html)
 {
     $this->dom = new \DOMDocument();
     libxml_use_internal_errors(true);
     $this->dom->loadHTML($html);
     $this->xp = new \DOMXPath($this->dom);
 }
开发者ID:ringmaster,项目名称:microsite2,代码行数:11,代码来源:HTMLDoc.php

示例2: __construct

 /**
  * Constructor
  * 
  * @param string $html
  * @return void
  */
 public function __construct($html)
 {
     libxml_use_internal_errors(true);
     $this->_document = new DomDocument();
     $this->_document->preserveWhiteSpace = false;
     $this->_document->loadHTML($html);
     libxml_use_internal_errors(false);
 }
开发者ID:real34,项目名称:i18n,代码行数:14,代码来源:html_tokenizer.php

示例3: getHtmlDocument

 /**
  * Load HTML document by using a DomDocument instance or return false on failure.
  *
  * @static
  * @access public
  * @param  string $input XML content
  * @return DOMDocument
  */
 public static function getHtmlDocument($input)
 {
     $dom = new DomDocument();
     if (empty($input)) {
         return $dom;
     }
     libxml_use_internal_errors(true);
     if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
         $dom->loadHTML($input, LIBXML_NONET);
     } else {
         $dom->loadHTML($input);
     }
     return $dom;
 }
开发者ID:indigo423,项目名称:blog.no42.org,代码行数:22,代码来源:XmlParser.php

示例4: get_body_length

function get_body_length($body)
{
    $string = trim($body);
    // DomDocument doesn't like empty strings
    if (!strlen($string)) {
        return 0;
    }
    // We need to get rid of hidden tags (display: none)
    // Get rid of the warning. It would be better to have some valid html as input
    $dom = @DomDocument::loadHTML($body);
    $xpath = new DOMXPath($dom);
    /*
     * Checking any possible syntax of the style attribute with xpath is impossible
     * So we just get any element with a style attribute, and check them with a regexp
     */
    $xr = $xpath->query('//*[@style]');
    foreach ($xr as $node) {
        if (preg_match('/.*display: *none *;.*/', $node->getAttribute('style'))) {
            // Hidden, remove it from its parent
            $node->parentNode->removeChild($node);
        }
    }
    // Now we can get the body of our HTML DomDocument, it contains only what is visible
    $string = $dom->saveHTML();
    $string = strip_tags($string);
    return strlen($string);
}
开发者ID:rabuzarus,项目名称:friendica-addons,代码行数:27,代码来源:showmore.php

示例5: getPreview

 public function getPreview($elements)
 {
     if (!isset($this->preview)) {
         if (!isset($elements)) {
             $elements = 2;
         }
         // Get just the text (no markup) from a node using $node->textContent.
         // Compare the textContent value to the one returned by $node->nodeValue.
         libxml_use_internal_errors(true);
         $dom = new DomDocument();
         $dom->preserveWhiteSpace = false;
         $dom->loadHTML('<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>' . $this->Body . '</body></html>');
         $dom->normalize();
         $nodes = $dom->getElementsByTagName("body")->item(0)->childNodes;
         $elementCount = 0;
         $this->preview = '';
         foreach ($nodes as $node) {
             if ($node->nodeType === XML_ELEMENT_NODE) {
                 $this->preview .= $dom->saveXML($node);
                 $elementCount++;
                 if ($elementCount === $elements) {
                     break;
                 }
             }
         }
         // Carriage returns in the XML prevent the markup from validating. -- cwells
         $this->preview = str_replace('&#13;', '', $this->preview);
     }
     return $this->preview;
 }
开发者ID:chriswells0,项目名称:cwa-blog,代码行数:30,代码来源:BlogPost.php

示例6: truncatehtml

 public function truncatehtml($html, $minimum)
 {
     $oldDocument = new \DomDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
     $oldDocument->loadHTML('<div>' . $html . '</div>');
     // remove DOCTYPE, HTML and BODY tags
     $oldDocument->removeChild($oldDocument->firstChild);
     $oldDocument->replaceChild($oldDocument->firstChild->firstChild->firstChild, $oldDocument->firstChild);
     $currentLength = 0;
     // displayed text length (without markup)
     $newDocument = new \DomDocument();
     foreach ($oldDocument->documentElement->childNodes as $node) {
         if ($node->nodeType != 3) {
             // not text node
             $imported = $newDocument->importNode($node, true);
             $newDocument->appendChild($imported);
             // copy original node to output document
             $currentLength += strlen(html_entity_decode($imported->nodeValue));
             if ($currentLength >= $minimum) {
                 // check if the minimum is reached
                 break;
             }
         }
     }
     $output = $newDocument->saveHTML();
     return html_entity_decode($output);
 }
开发者ID:sitobcn82,项目名称:blog,代码行数:27,代码来源:TruncateHtmlExtension.php

示例7: initializeDomDocument

 /**
  * @return $this
  */
 public function initializeDomDocument()
 {
     $doc = new \DomDocument();
     $doc->loadHTML($this->html);
     $this->getPage()->setDocument($doc);
     return $this;
 }
开发者ID:JINCHUNGEUN,项目名称:page-scraper,代码行数:10,代码来源:PageBuilder.php

示例8: getMetaTags

 public static function getMetaTags($url)
 {
     $rc = null;
     try {
         $settings[CURLOPT_URL] = $url;
         $contents = self::runCurl($settings);
         if (!empty($contents)) {
             libxml_use_internal_errors(true);
             $doc = new \DomDocument();
             $doc->loadHTML($contents);
             $metas = $doc->getElementsByTagName('meta');
             $rc = array();
             foreach ($metas as $meta) {
                 $name = $meta->getAttribute('name');
                 if (empty($name)) {
                     $name = $meta->getAttribute('property');
                 }
                 $content = $meta->getAttribute('content');
                 if (empty($content)) {
                     $content = $meta->getAttribute('value');
                 }
                 if (!empty($name) && !empty($content)) {
                     $rc[$name] = $content;
                 }
             }
         }
         return $rc;
     } catch (Exception $e) {
         return $rc;
     }
 }
开发者ID:chiasean,项目名称:saywut,代码行数:31,代码来源:Core.php

示例9: fillInHtml

 public function fillInHtml($html, $formName, $formId, $values)
 {
     $dom = new DomDocument('1.0', sfConfig::get('sf_charset', 'UTF-8'));
     @$dom->loadHTML($html);
     $dom = $this->fillInDom($dom, $formName, $formId, $values);
     return $dom->saveHTML();
 }
开发者ID:DBezemer,项目名称:server,代码行数:7,代码来源:sfFillInForm.class.php

示例10: exec

 /**
  * 
  * @param string $html
  */
 public function exec($html)
 {
     mb_language('Japanese');
     // 1.プリプロセス
     // scriptテキスト削除
     // script内に文字列リテラルの閉じタグがあるとDomDocumentがscriptのソースを#text扱いしてしまうので
     // script内の文字を削除する
     // 正規表現で削除しようとするとSegmentation faultが発生する(StackOverFlow?)ので
     // simple_html_domでscript内文字列を削除
     // MAX_FILE_SIZEの制限にひっかかったので、ソースを編集してデフォルトの3倍に変更している
     $simpleHtml = str_get_html($html);
     foreach ($simpleHtml->find('script') as $script) {
         $script->innertext = '';
     }
     $html = $simpleHtml->outertext;
     // トリム
     //		$html = preg_replace('/(\s| )+/mi', ' ', $html);
     // 2. dom生成
     $doc = new DomDocument("1.0", "utf-8");
     @$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
     $node = $doc->getElementsByTagName('body')->item(0);
     $this->preProcessedInput = $node->textContent;
     // 3.プロパティを初期化
     $this->domXPath = new DomXPath($doc);
     $this->title = @$doc->getElementsByTagName('title')->item(0)->textContent;
     $text = $this->scan($node);
     $this->textAll = $text;
     $this->domCountAll = $this->domCount;
     $this->pancutuationCountAll = $this->calcKutenScore($text) + $this->calcTotenScore($text);
     $this->textLengthAll = mb_strlen($text);
     $this->highScore = -1000000;
     $this->extracedNode = null;
     // 4.実行
     $this->extract($node);
 }
开发者ID:gammodoking,项目名称:kindle.server,代码行数:39,代码来源:ContentExtractor.php

示例11: returnXPathObject

function returnXPathObject($item)
{
    $xmlPageDom = new DomDocument();
    @$xmlPageDom->loadHTML($item);
    $xmlPageXPath = new DOMXPath($xmlPageDom);
    return $xmlPageXPath;
}
开发者ID:shresthasumit55,项目名称:scraper,代码行数:7,代码来源:scraper.php

示例12: hal_parse

function hal_parse($url)
{
    $url = trim(html_entity_decode($url), "\"' ");
    $infos = parse_url($url);
    $ip = gethostbyname($infos['host']);
    if ($ip != '193.48.96.10') {
        spip_log("Url invalid", _LOG_ERREUR);
        return;
    }
    spip_log(sprintf("[hal_parse] init_http(%s)", $url), _LOG_DEBUG);
    $content = recuperer_page($url);
    spip_log(sprintf("[hal_parse] init_http(%s): Done", $url), _LOG_DEBUG);
    $dom = new DomDocument('1.0', 'UTF-8');
    $dom->preserveWhiteSpace = false;
    $str = mb_convert_encoding($content, "HTML-ENTITIES");
    @$dom->loadHTML($str);
    $xpath = new DOMXpath($dom);
    $entries = $xpath->query('//div[@id="res_script"]');
    if ($entries->length == 0) {
        spip_log("No tag found ...", _LOG_ERREUR);
        return;
    }
    $res_script = $dom->saveXML($entries->item(0));
    return $res_script;
}
开发者ID:RBoisselet,项目名称:spip_webpage_from_hal,代码行数:25,代码来源:webpage_from_hal_fonctions.php

示例13: getProductComments

 /**
  * 
  * Enter description here ...
  * @param unknown_type $url
  */
 public static function getProductComments($url)
 {
     header('Content-type: text/html; charset=utf-8');
     $url = "http://ormatek.com/products/980";
     $doc = file_get_contents($url);
     $doc = mb_convert_encoding($doc, 'HTML-ENTITIES', "UTF-8");
     $query = ".//*[@class='comment']";
     $dom = new DomDocument();
     libxml_use_internal_errors(true);
     $dom->loadHTML($doc);
     $xpath = new DomXPath($dom);
     $nodes = $xpath->query($query);
     $i = 0;
     if (!is_array($nodes)) {
         return null;
     }
     foreach ($nodes as $node) {
         $name = $node->getElementsByTagName("b")->item(0)->nodeValue;
         $text = $node->getElementsByTagName("p")->item(0)->nodeValue;
         $date = $node->getElementsByTagName("span")->item(0)->nodeValue;
         $rating = 0;
         $i++;
         $param[] = array('id' => $i, 'name' => $name, 'date' => $date, 'rating' => $rating, 'text' => $text);
     }
     return $param;
 }
开发者ID:evgrishin,项目名称:mh16014,代码行数:31,代码来源:ormatekgrabber.php

示例14: capi_mkfeedtitle

function capi_mkfeedtitle($feed)
{
    global $_SGLOBAL, $_SN, $_SCONFIG;
    $feed['title_data'] = empty($feed['title_data']) ? array() : unserialize($feed['title_data']);
    if (!is_array($feed['title_data'])) {
        $feed['title_data'] = array();
    }
    //title
    $searchs = $replaces = array();
    if ($feed['title_data'] && is_array($feed['title_data'])) {
        foreach (array_keys($feed['title_data']) as $key) {
            if ($key === "touser") {
                $dom = new DomDocument();
                @$dom->loadHTML($feed["title_data"]["touser"]);
                $urls = $dom->getElementsByTagName('a');
                $url = $urls->item(0);
                $value["title_data"]["touser"] = capi_fhtml($value["title_data"]["touser"]);
            }
            $searchs[] = '{' . $key . '}';
            $replaces[] = $feed['title_data'][$key];
        }
    }
    $searchs[] = '{actor}';
    $replaces[] = empty($actors) ? $_SN[$feed['uid']] : implode(lang('dot'), $actors);
    $feed['title_template'] = mktarget(str_replace($searchs, $replaces, $feed['title_template']));
    return $feed;
}
开发者ID:NaturalWill,项目名称:UCQA,代码行数:27,代码来源:function_capi.php

示例15: isset

 function __construct($url, $response, $browser)
 {
     $this->url = $url;
     $this->html = $response;
     $this->parseResponse($response);
     $this->is_xml = isset($this->headers['Content-Type']) && preg_match('/\\bxml\\b/i', $this->headers['Content-Type']) ? true : false;
     $this->browser = $browser;
     $this->dom = new DOMDocument();
     if ($this->is_xml) {
         @$this->dom->loadXML($this->html);
     } else {
         @$this->dom->loadHTML($this->html);
     }
     $this->xpath = new DOMXPath($this->dom);
     $this->title = ($node = $this->xpath->query('//title')->item(0)) ? $node->nodeValue : '';
     $this->forms = array();
     foreach ($this->xpath->query('//form') as $form) {
         $this->_forms[] = new PGForm($form, $this);
     }
     if ($browser->convertUrls) {
         $this->convertUrls();
     }
     $this->setParser($this->html, $this->is_xml);
     if (function_exists('gc_collect_cycles')) {
         gc_collect_cycles();
     }
 }
开发者ID:byjg,项目名称:pgbrowser,代码行数:27,代码来源:PGPage.php


注:本文中的DomDocument::loadHTML方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。