本文整理汇总了PHP中DomDocument::loadHTML方法的典型用法代码示例。如果您正苦于以下问题:PHP DomDocument::loadHTML方法的具体用法?PHP DomDocument::loadHTML怎么用?PHP DomDocument::loadHTML使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类DomDocument
的用法示例。
在下文中一共展示了DomDocument::loadHTML方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: __construct
/**
* Create a HTMLDoc object
* @param string $html The HTML to parse
*/
public function __construct($html)
{
$this->dom = new \DOMDocument();
libxml_use_internal_errors(true);
$this->dom->loadHTML($html);
$this->xp = new \DOMXPath($this->dom);
}
示例2: __construct
/**
* Constructor
*
* @param string $html
* @return void
*/
public function __construct($html)
{
libxml_use_internal_errors(true);
$this->_document = new DomDocument();
$this->_document->preserveWhiteSpace = false;
$this->_document->loadHTML($html);
libxml_use_internal_errors(false);
}
示例3: getHtmlDocument
/**
* Load HTML document by using a DomDocument instance or return false on failure.
*
* @static
* @access public
* @param string $input XML content
* @return DOMDocument
*/
public static function getHtmlDocument($input)
{
$dom = new DomDocument();
if (empty($input)) {
return $dom;
}
libxml_use_internal_errors(true);
if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
$dom->loadHTML($input, LIBXML_NONET);
} else {
$dom->loadHTML($input);
}
return $dom;
}
示例4: get_body_length
function get_body_length($body)
{
$string = trim($body);
// DomDocument doesn't like empty strings
if (!strlen($string)) {
return 0;
}
// We need to get rid of hidden tags (display: none)
// Get rid of the warning. It would be better to have some valid html as input
$dom = @DomDocument::loadHTML($body);
$xpath = new DOMXPath($dom);
/*
* Checking any possible syntax of the style attribute with xpath is impossible
* So we just get any element with a style attribute, and check them with a regexp
*/
$xr = $xpath->query('//*[@style]');
foreach ($xr as $node) {
if (preg_match('/.*display: *none *;.*/', $node->getAttribute('style'))) {
// Hidden, remove it from its parent
$node->parentNode->removeChild($node);
}
}
// Now we can get the body of our HTML DomDocument, it contains only what is visible
$string = $dom->saveHTML();
$string = strip_tags($string);
return strlen($string);
}
示例5: getPreview
public function getPreview($elements)
{
if (!isset($this->preview)) {
if (!isset($elements)) {
$elements = 2;
}
// Get just the text (no markup) from a node using $node->textContent.
// Compare the textContent value to the one returned by $node->nodeValue.
libxml_use_internal_errors(true);
$dom = new DomDocument();
$dom->preserveWhiteSpace = false;
$dom->loadHTML('<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>' . $this->Body . '</body></html>');
$dom->normalize();
$nodes = $dom->getElementsByTagName("body")->item(0)->childNodes;
$elementCount = 0;
$this->preview = '';
foreach ($nodes as $node) {
if ($node->nodeType === XML_ELEMENT_NODE) {
$this->preview .= $dom->saveXML($node);
$elementCount++;
if ($elementCount === $elements) {
break;
}
}
}
// Carriage returns in the XML prevent the markup from validating. -- cwells
$this->preview = str_replace(' ', '', $this->preview);
}
return $this->preview;
}
示例6: truncatehtml
public function truncatehtml($html, $minimum)
{
$oldDocument = new \DomDocument();
$html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
$oldDocument->loadHTML('<div>' . $html . '</div>');
// remove DOCTYPE, HTML and BODY tags
$oldDocument->removeChild($oldDocument->firstChild);
$oldDocument->replaceChild($oldDocument->firstChild->firstChild->firstChild, $oldDocument->firstChild);
$currentLength = 0;
// displayed text length (without markup)
$newDocument = new \DomDocument();
foreach ($oldDocument->documentElement->childNodes as $node) {
if ($node->nodeType != 3) {
// not text node
$imported = $newDocument->importNode($node, true);
$newDocument->appendChild($imported);
// copy original node to output document
$currentLength += strlen(html_entity_decode($imported->nodeValue));
if ($currentLength >= $minimum) {
// check if the minimum is reached
break;
}
}
}
$output = $newDocument->saveHTML();
return html_entity_decode($output);
}
示例7: initializeDomDocument
/**
* @return $this
*/
public function initializeDomDocument()
{
$doc = new \DomDocument();
$doc->loadHTML($this->html);
$this->getPage()->setDocument($doc);
return $this;
}
示例8: getMetaTags
public static function getMetaTags($url)
{
$rc = null;
try {
$settings[CURLOPT_URL] = $url;
$contents = self::runCurl($settings);
if (!empty($contents)) {
libxml_use_internal_errors(true);
$doc = new \DomDocument();
$doc->loadHTML($contents);
$metas = $doc->getElementsByTagName('meta');
$rc = array();
foreach ($metas as $meta) {
$name = $meta->getAttribute('name');
if (empty($name)) {
$name = $meta->getAttribute('property');
}
$content = $meta->getAttribute('content');
if (empty($content)) {
$content = $meta->getAttribute('value');
}
if (!empty($name) && !empty($content)) {
$rc[$name] = $content;
}
}
}
return $rc;
} catch (Exception $e) {
return $rc;
}
}
示例9: fillInHtml
public function fillInHtml($html, $formName, $formId, $values)
{
$dom = new DomDocument('1.0', sfConfig::get('sf_charset', 'UTF-8'));
@$dom->loadHTML($html);
$dom = $this->fillInDom($dom, $formName, $formId, $values);
return $dom->saveHTML();
}
示例10: exec
/**
*
* @param string $html
*/
public function exec($html)
{
mb_language('Japanese');
// 1.プリプロセス
// scriptテキスト削除
// script内に文字列リテラルの閉じタグがあるとDomDocumentがscriptのソースを#text扱いしてしまうので
// script内の文字を削除する
// 正規表現で削除しようとするとSegmentation faultが発生する(StackOverFlow?)ので
// simple_html_domでscript内文字列を削除
// MAX_FILE_SIZEの制限にひっかかったので、ソースを編集してデフォルトの3倍に変更している
$simpleHtml = str_get_html($html);
foreach ($simpleHtml->find('script') as $script) {
$script->innertext = '';
}
$html = $simpleHtml->outertext;
// トリム
// $html = preg_replace('/(\s| )+/mi', ' ', $html);
// 2. dom生成
$doc = new DomDocument("1.0", "utf-8");
@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
$node = $doc->getElementsByTagName('body')->item(0);
$this->preProcessedInput = $node->textContent;
// 3.プロパティを初期化
$this->domXPath = new DomXPath($doc);
$this->title = @$doc->getElementsByTagName('title')->item(0)->textContent;
$text = $this->scan($node);
$this->textAll = $text;
$this->domCountAll = $this->domCount;
$this->pancutuationCountAll = $this->calcKutenScore($text) + $this->calcTotenScore($text);
$this->textLengthAll = mb_strlen($text);
$this->highScore = -1000000;
$this->extracedNode = null;
// 4.実行
$this->extract($node);
}
示例11: returnXPathObject
function returnXPathObject($item)
{
$xmlPageDom = new DomDocument();
@$xmlPageDom->loadHTML($item);
$xmlPageXPath = new DOMXPath($xmlPageDom);
return $xmlPageXPath;
}
示例12: hal_parse
function hal_parse($url)
{
$url = trim(html_entity_decode($url), "\"' ");
$infos = parse_url($url);
$ip = gethostbyname($infos['host']);
if ($ip != '193.48.96.10') {
spip_log("Url invalid", _LOG_ERREUR);
return;
}
spip_log(sprintf("[hal_parse] init_http(%s)", $url), _LOG_DEBUG);
$content = recuperer_page($url);
spip_log(sprintf("[hal_parse] init_http(%s): Done", $url), _LOG_DEBUG);
$dom = new DomDocument('1.0', 'UTF-8');
$dom->preserveWhiteSpace = false;
$str = mb_convert_encoding($content, "HTML-ENTITIES");
@$dom->loadHTML($str);
$xpath = new DOMXpath($dom);
$entries = $xpath->query('//div[@id="res_script"]');
if ($entries->length == 0) {
spip_log("No tag found ...", _LOG_ERREUR);
return;
}
$res_script = $dom->saveXML($entries->item(0));
return $res_script;
}
示例13: getProductComments
/**
*
* Enter description here ...
* @param unknown_type $url
*/
public static function getProductComments($url)
{
header('Content-type: text/html; charset=utf-8');
$url = "http://ormatek.com/products/980";
$doc = file_get_contents($url);
$doc = mb_convert_encoding($doc, 'HTML-ENTITIES', "UTF-8");
$query = ".//*[@class='comment']";
$dom = new DomDocument();
libxml_use_internal_errors(true);
$dom->loadHTML($doc);
$xpath = new DomXPath($dom);
$nodes = $xpath->query($query);
$i = 0;
if (!is_array($nodes)) {
return null;
}
foreach ($nodes as $node) {
$name = $node->getElementsByTagName("b")->item(0)->nodeValue;
$text = $node->getElementsByTagName("p")->item(0)->nodeValue;
$date = $node->getElementsByTagName("span")->item(0)->nodeValue;
$rating = 0;
$i++;
$param[] = array('id' => $i, 'name' => $name, 'date' => $date, 'rating' => $rating, 'text' => $text);
}
return $param;
}
示例14: capi_mkfeedtitle
function capi_mkfeedtitle($feed)
{
global $_SGLOBAL, $_SN, $_SCONFIG;
$feed['title_data'] = empty($feed['title_data']) ? array() : unserialize($feed['title_data']);
if (!is_array($feed['title_data'])) {
$feed['title_data'] = array();
}
//title
$searchs = $replaces = array();
if ($feed['title_data'] && is_array($feed['title_data'])) {
foreach (array_keys($feed['title_data']) as $key) {
if ($key === "touser") {
$dom = new DomDocument();
@$dom->loadHTML($feed["title_data"]["touser"]);
$urls = $dom->getElementsByTagName('a');
$url = $urls->item(0);
$value["title_data"]["touser"] = capi_fhtml($value["title_data"]["touser"]);
}
$searchs[] = '{' . $key . '}';
$replaces[] = $feed['title_data'][$key];
}
}
$searchs[] = '{actor}';
$replaces[] = empty($actors) ? $_SN[$feed['uid']] : implode(lang('dot'), $actors);
$feed['title_template'] = mktarget(str_replace($searchs, $replaces, $feed['title_template']));
return $feed;
}
示例15: isset
function __construct($url, $response, $browser)
{
$this->url = $url;
$this->html = $response;
$this->parseResponse($response);
$this->is_xml = isset($this->headers['Content-Type']) && preg_match('/\\bxml\\b/i', $this->headers['Content-Type']) ? true : false;
$this->browser = $browser;
$this->dom = new DOMDocument();
if ($this->is_xml) {
@$this->dom->loadXML($this->html);
} else {
@$this->dom->loadHTML($this->html);
}
$this->xpath = new DOMXPath($this->dom);
$this->title = ($node = $this->xpath->query('//title')->item(0)) ? $node->nodeValue : '';
$this->forms = array();
foreach ($this->xpath->query('//form') as $form) {
$this->_forms[] = new PGForm($form, $this);
}
if ($browser->convertUrls) {
$this->convertUrls();
}
$this->setParser($this->html, $this->is_xml);
if (function_exists('gc_collect_cycles')) {
gc_collect_cycles();
}
}