PHP DomDocument::loadHTML方法代码示例

本文整理汇总了PHP中DomDocument::loadHTML方法的典型用法代码示例。如果您正苦于以下问题：PHP DomDocument::loadHTML方法的具体用法？PHP DomDocument::loadHTML怎么用？PHP DomDocument::loadHTML使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类DomDocument的用法示例。

在下文中一共展示了DomDocument::loadHTML方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: __construct

 /**
  * Create a HTMLDoc object
  * @param string $html The HTML to parse
  */
 public function __construct($html)
 {
     $this->dom = new \DOMDocument();
     libxml_use_internal_errors(true);
     $this->dom->loadHTML($html);
     $this->xp = new \DOMXPath($this->dom);
 }

开发者ID:ringmaster，项目名称:microsite2，代码行数:11，代码来源:HTMLDoc.php

示例2: __construct

 /**
  * Constructor
  * 
  * @param string $html
  * @return void
  */
 public function __construct($html)
 {
     libxml_use_internal_errors(true);
     $this->_document = new DomDocument();
     $this->_document->preserveWhiteSpace = false;
     $this->_document->loadHTML($html);
     libxml_use_internal_errors(false);
 }

开发者ID:real34，项目名称:i18n，代码行数:14，代码来源:html_tokenizer.php

示例3: getHtmlDocument

 /**
  * Load HTML document by using a DomDocument instance or return false on failure.
  *
  * @static
  * @access public
  * @param  string $input XML content
  * @return DOMDocument
  */
 public static function getHtmlDocument($input)
 {
     $dom = new DomDocument();
     if (empty($input)) {
         return $dom;
     }
     libxml_use_internal_errors(true);
     if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
         $dom->loadHTML($input, LIBXML_NONET);
     } else {
         $dom->loadHTML($input);
     }
     return $dom;
 }

开发者ID:indigo423，项目名称:blog.no42.org，代码行数:22，代码来源:XmlParser.php

示例4: get_body_length

function get_body_length($body)
{
    $string = trim($body);
    // DomDocument doesn't like empty strings
    if (!strlen($string)) {
        return 0;
    }
    // We need to get rid of hidden tags (display: none)
    // Get rid of the warning. It would be better to have some valid html as input
    $dom = @DomDocument::loadHTML($body);
    $xpath = new DOMXPath($dom);
    /*
     * Checking any possible syntax of the style attribute with xpath is impossible
     * So we just get any element with a style attribute, and check them with a regexp
     */
    $xr = $xpath->query('//*[@style]');
    foreach ($xr as $node) {
        if (preg_match('/.*display: *none *;.*/', $node->getAttribute('style'))) {
            // Hidden, remove it from its parent
            $node->parentNode->removeChild($node);
        }
    }
    // Now we can get the body of our HTML DomDocument, it contains only what is visible
    $string = $dom->saveHTML();
    $string = strip_tags($string);
    return strlen($string);
}

开发者ID:rabuzarus，项目名称:friendica-addons，代码行数:27，代码来源:showmore.php

示例5: getPreview

 public function getPreview($elements)
 {
     if (!isset($this->preview)) {
         if (!isset($elements)) {
             $elements = 2;
         }
         // Get just the text (no markup) from a node using $node->textContent.
         // Compare the textContent value to the one returned by $node->nodeValue.
         libxml_use_internal_errors(true);
         $dom = new DomDocument();
         $dom->preserveWhiteSpace = false;
         $dom->loadHTML('<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>' . $this->Body . '</body></html>');
         $dom->normalize();
         $nodes = $dom->getElementsByTagName("body")->item(0)->childNodes;
         $elementCount = 0;
         $this->preview = '';
         foreach ($nodes as $node) {
             if ($node->nodeType === XML_ELEMENT_NODE) {
                 $this->preview .= $dom->saveXML($node);
                 $elementCount++;
                 if ($elementCount === $elements) {
                     break;
                 }
             }
         }
         // Carriage returns in the XML prevent the markup from validating. -- cwells
         $this->preview = str_replace('&#13;', '', $this->preview);
     }
     return $this->preview;
 }

开发者ID:chriswells0，项目名称:cwa-blog，代码行数:30，代码来源:BlogPost.php

示例6: truncatehtml

 public function truncatehtml($html, $minimum)
 {
     $oldDocument = new \DomDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
     $oldDocument->loadHTML('<div>' . $html . '</div>');
     // remove DOCTYPE, HTML and BODY tags
     $oldDocument->removeChild($oldDocument->firstChild);
     $oldDocument->replaceChild($oldDocument->firstChild->firstChild->firstChild, $oldDocument->firstChild);
     $currentLength = 0;
     // displayed text length (without markup)
     $newDocument = new \DomDocument();
     foreach ($oldDocument->documentElement->childNodes as $node) {
         if ($node->nodeType != 3) {
             // not text node
             $imported = $newDocument->importNode($node, true);
             $newDocument->appendChild($imported);
             // copy original node to output document
             $currentLength += strlen(html_entity_decode($imported->nodeValue));
             if ($currentLength >= $minimum) {
                 // check if the minimum is reached
                 break;
             }
         }
     }
     $output = $newDocument->saveHTML();
     return html_entity_decode($output);
 }

开发者ID:sitobcn82，项目名称:blog，代码行数:27，代码来源:TruncateHtmlExtension.php

示例7: initializeDomDocument

 /**
  * @return $this
  */
 public function initializeDomDocument()
 {
     $doc = new \DomDocument();
     $doc->loadHTML($this->html);
     $this->getPage()->setDocument($doc);
     return $this;
 }

开发者ID:JINCHUNGEUN，项目名称:page-scraper，代码行数:10，代码来源:PageBuilder.php

示例8: getMetaTags

 public static function getMetaTags($url)
 {
     $rc = null;
     try {
         $settings[CURLOPT_URL] = $url;
         $contents = self::runCurl($settings);
         if (!empty($contents)) {
             libxml_use_internal_errors(true);
             $doc = new \DomDocument();
             $doc->loadHTML($contents);
             $metas = $doc->getElementsByTagName('meta');
             $rc = array();
             foreach ($metas as $meta) {
                 $name = $meta->getAttribute('name');
                 if (empty($name)) {
                     $name = $meta->getAttribute('property');
                 }
                 $content = $meta->getAttribute('content');
                 if (empty($content)) {
                     $content = $meta->getAttribute('value');
                 }
                 if (!empty($name) && !empty($content)) {
                     $rc[$name] = $content;
                 }
             }
         }
         return $rc;
     } catch (Exception $e) {
         return $rc;
     }
 }

开发者ID:chiasean，项目名称:saywut，代码行数:31，代码来源:Core.php

示例9: fillInHtml

 public function fillInHtml($html, $formName, $formId, $values)
 {
     $dom = new DomDocument('1.0', sfConfig::get('sf_charset', 'UTF-8'));
     @$dom->loadHTML($html);
     $dom = $this->fillInDom($dom, $formName, $formId, $values);
     return $dom->saveHTML();
 }

开发者ID:DBezemer，项目名称:server，代码行数:7，代码来源:sfFillInForm.class.php

示例10: exec

 /**
  * 
  * @param string $html
  */
 public function exec($html)
 {
     mb_language('Japanese');
     // 1.プリプロセス
     // scriptテキスト削除
     // script内に文字列リテラルの閉じタグがあるとDomDocumentがscriptのソースを#text扱いしてしまうので
     // script内の文字を削除する
     // 正規表現で削除しようとするとSegmentation faultが発生する（StackOverFlow?）ので
     // simple_html_domでscript内文字列を削除
     // MAX_FILE_SIZEの制限にひっかかったので、ソースを編集してデフォルトの3倍に変更している
     $simpleHtml = str_get_html($html);
     foreach ($simpleHtml->find('script') as $script) {
         $script->innertext = '';
     }
     $html = $simpleHtml->outertext;
     // トリム
     //		$html = preg_replace('/(\s|　)+/mi', ' ', $html);
     // 2. dom生成
     $doc = new DomDocument("1.0", "utf-8");
     @$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
     $node = $doc->getElementsByTagName('body')->item(0);
     $this->preProcessedInput = $node->textContent;
     // 3.プロパティを初期化
     $this->domXPath = new DomXPath($doc);
     $this->title = @$doc->getElementsByTagName('title')->item(0)->textContent;
     $text = $this->scan($node);
     $this->textAll = $text;
     $this->domCountAll = $this->domCount;
     $this->pancutuationCountAll = $this->calcKutenScore($text) + $this->calcTotenScore($text);
     $this->textLengthAll = mb_strlen($text);
     $this->highScore = -1000000;
     $this->extracedNode = null;
     // 4.実行
     $this->extract($node);
 }

开发者ID:gammodoking，项目名称:kindle.server，代码行数:39，代码来源:ContentExtractor.php

示例11: returnXPathObject

function returnXPathObject($item)
{
    $xmlPageDom = new DomDocument();
    @$xmlPageDom->loadHTML($item);
    $xmlPageXPath = new DOMXPath($xmlPageDom);
    return $xmlPageXPath;
}

开发者ID:shresthasumit55，项目名称:scraper，代码行数:7，代码来源:scraper.php

示例12: hal_parse

function hal_parse($url)
{
    $url = trim(html_entity_decode($url), "\"' ");
    $infos = parse_url($url);
    $ip = gethostbyname($infos['host']);
    if ($ip != '193.48.96.10') {
        spip_log("Url invalid", _LOG_ERREUR);
        return;
    }
    spip_log(sprintf("[hal_parse] init_http(%s)", $url), _LOG_DEBUG);
    $content = recuperer_page($url);
    spip_log(sprintf("[hal_parse] init_http(%s): Done", $url), _LOG_DEBUG);
    $dom = new DomDocument('1.0', 'UTF-8');
    $dom->preserveWhiteSpace = false;
    $str = mb_convert_encoding($content, "HTML-ENTITIES");
    @$dom->loadHTML($str);
    $xpath = new DOMXpath($dom);
    $entries = $xpath->query('//div[@id="res_script"]');
    if ($entries->length == 0) {
        spip_log("No tag found ...", _LOG_ERREUR);
        return;
    }
    $res_script = $dom->saveXML($entries->item(0));
    return $res_script;
}

开发者ID:RBoisselet，项目名称:spip_webpage_from_hal，代码行数:25，代码来源:webpage_from_hal_fonctions.php

示例13: getProductComments

 /**
  * 
  * Enter description here ...
  * @param unknown_type $url
  */
 public static function getProductComments($url)
 {
     header('Content-type: text/html; charset=utf-8');
     $url = "http://ormatek.com/products/980";
     $doc = file_get_contents($url);
     $doc = mb_convert_encoding($doc, 'HTML-ENTITIES', "UTF-8");
     $query = ".//*[@class='comment']";
     $dom = new DomDocument();
     libxml_use_internal_errors(true);
     $dom->loadHTML($doc);
     $xpath = new DomXPath($dom);
     $nodes = $xpath->query($query);
     $i = 0;
     if (!is_array($nodes)) {
         return null;
     }
     foreach ($nodes as $node) {
         $name = $node->getElementsByTagName("b")->item(0)->nodeValue;
         $text = $node->getElementsByTagName("p")->item(0)->nodeValue;
         $date = $node->getElementsByTagName("span")->item(0)->nodeValue;
         $rating = 0;
         $i++;
         $param[] = array('id' => $i, 'name' => $name, 'date' => $date, 'rating' => $rating, 'text' => $text);
     }
     return $param;
 }

开发者ID:evgrishin，项目名称:mh16014，代码行数:31，代码来源:ormatekgrabber.php

示例14: capi_mkfeedtitle

function capi_mkfeedtitle($feed)
{
    global $_SGLOBAL, $_SN, $_SCONFIG;
    $feed['title_data'] = empty($feed['title_data']) ? array() : unserialize($feed['title_data']);
    if (!is_array($feed['title_data'])) {
        $feed['title_data'] = array();
    }
    //title
    $searchs = $replaces = array();
    if ($feed['title_data'] && is_array($feed['title_data'])) {
        foreach (array_keys($feed['title_data']) as $key) {
            if ($key === "touser") {
                $dom = new DomDocument();
                @$dom->loadHTML($feed["title_data"]["touser"]);
                $urls = $dom->getElementsByTagName('a');
                $url = $urls->item(0);
                $value["title_data"]["touser"] = capi_fhtml($value["title_data"]["touser"]);
            }
            $searchs[] = '{' . $key . '}';
            $replaces[] = $feed['title_data'][$key];
        }
    }
    $searchs[] = '{actor}';
    $replaces[] = empty($actors) ? $_SN[$feed['uid']] : implode(lang('dot'), $actors);
    $feed['title_template'] = mktarget(str_replace($searchs, $replaces, $feed['title_template']));
    return $feed;
}

开发者ID:NaturalWill，项目名称:UCQA，代码行数:27，代码来源:function_capi.php

示例15: isset

 function __construct($url, $response, $browser)
 {
     $this->url = $url;
     $this->html = $response;
     $this->parseResponse($response);
     $this->is_xml = isset($this->headers['Content-Type']) && preg_match('/\\bxml\\b/i', $this->headers['Content-Type']) ? true : false;
     $this->browser = $browser;
     $this->dom = new DOMDocument();
     if ($this->is_xml) {
         @$this->dom->loadXML($this->html);
     } else {
         @$this->dom->loadHTML($this->html);
     }
     $this->xpath = new DOMXPath($this->dom);
     $this->title = ($node = $this->xpath->query('//title')->item(0)) ? $node->nodeValue : '';
     $this->forms = array();
     foreach ($this->xpath->query('//form') as $form) {
         $this->_forms[] = new PGForm($form, $this);
     }
     if ($browser->convertUrls) {
         $this->convertUrls();
     }
     $this->setParser($this->html, $this->is_xml);
     if (function_exists('gc_collect_cycles')) {
         gc_collect_cycles();
     }
 }

开发者ID:byjg，项目名称:pgbrowser，代码行数:27，代码来源:PGPage.php

注：本文中的DomDocument::loadHTML方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。