本文整理汇总了PHP中DOMDocument::loadHTML方法的典型用法代码示例。如果您正苦于以下问题:PHP DOMDocument::loadHTML方法的具体用法?PHP DOMDocument::loadHTML怎么用?PHP DOMDocument::loadHTML使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类DOMDocument
的用法示例。
在下文中一共展示了DOMDocument::loadHTML方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: __construct
/**
* Constructor.
*
* @param string $text The text of the HTML document.
* @param string $charset The charset of the HTML document.
*
* @throws Exception
*/
public function __construct($text, $charset = null)
{
if (!extension_loaded('dom')) {
throw new Exception('DOM extension is not available.');
}
// Bug #9616: Make sure we have valid HTML input.
if (!strlen($text)) {
$text = '<html></html>';
}
$old_error = libxml_use_internal_errors(true);
$doc = new DOMDocument();
if (is_null($charset)) {
/* If no charset given, charset is whatever libxml tells us the
* encoding should be defaulting to 'iso-8859-1'. */
$doc->loadHTML($text);
$this->_origCharset = $doc->encoding ? $doc->encoding : 'iso-8859-1';
} else {
/* Convert/try with UTF-8 first. */
$this->_origCharset = Horde_String::lower($charset);
$this->_xmlencoding = '<?xml encoding="UTF-8"?>';
$doc->loadHTML($this->_xmlencoding . Horde_String::convertCharset($text, $charset, 'UTF-8'));
if ($doc->encoding && Horde_String::lower($doc->encoding) != 'utf-8') {
/* Convert charset to what the HTML document says it SHOULD
* be. */
$doc->loadHTML(Horde_String::convertCharset($text, $charset, $doc->encoding));
$this->_xmlencoding = '';
}
}
if ($old_error) {
libxml_use_internal_errors(false);
}
$this->dom = $doc;
}
示例2: trimFromHTMLString
/**
* @test
*/
public function trimFromHTMLString()
{
$helper = new DOMHelper();
$directory = dirname(__FILE__) . DIRECTORY_SEPARATOR . 'htmlData' . DIRECTORY_SEPARATOR;
$dom = new DOMDocument();
$input = file_get_contents($directory . 'trimAfterString_input_1.html');
// Si le marqueur n'existe pas, le texte est renvoyé intact
$expected = str_replace("\n", "", $input);
$actual = str_replace("\n", "", $helper->trimFromHTMLString($input, "{{XXXXXX}}"));
$this->assertEquals(preg_replace('/\\s+/', '', $expected), preg_replace('/\\s+/', '', $actual));
// Suppression simple
$htmlHead = '<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"><title>***</title></head><body>';
$htmlFoot = '</body></html>';
$actual = str_replace("\n", "", $helper->trimFromHTMLString($input, "{{LIRE_LA_SUITE}}"));
$dom->loadHTML($htmlHead . file_get_contents($directory . 'trimAfterString_output_1.html') . $htmlFoot);
$expected = $this->cleanTmpHTML(str_replace("\n", "", $dom->saveHTML()), $htmlHead, $htmlFoot);
$this->assertEquals(preg_replace('/\\s+/', '', $expected), preg_replace('/\\s+/', '', $actual));
// Suppression avec insertion d'un bouton "Lire la suite"
$actual = str_replace("\n", "", $helper->trimFromHTMLString($input, "{{LIRE_LA_SUITE}}", "<button>Lire la suite</button>"));
$dom->loadHTML($htmlHead . file_get_contents($directory . 'trimAfterString_output_2.html') . $htmlFoot);
$expected = $this->cleanTmpHTML(str_replace("\n", "", $dom->saveHTML()), $htmlHead, $htmlFoot);
$this->assertEquals(preg_replace('/\\s+/', '', $expected), preg_replace('/\\s+/', '', $actual));
// Suppression avec insertion d'un texte et d'un bouton "Lire la suite"
$actual = str_replace("\n", "", $helper->trimFromHTMLString($input, "{{LIRE_LA_SUITE}}", "Pour en savoir plus : <button>Lire la suite</button>"));
$dom->loadHTML($htmlHead . file_get_contents($directory . 'trimAfterString_output_3.html') . $htmlFoot);
$expected = $this->cleanTmpHTML(str_replace("\n", "", $dom->saveHTML()), $htmlHead, $htmlFoot);
$this->assertEquals(preg_replace('/\\s+/', '', $expected), preg_replace('/\\s+/', '', $actual));
}
示例3: setMarkup
/**
* @param $markup
*
* @throws \SxCore\Html\Exception\InvalidArgumentException
*/
public function setMarkup($markup)
{
if (!is_string($markup)) {
throw new Exception\InvalidArgumentException('Expected string. Got "' . gettype($markup) . '".');
}
$this->DOMDocument = new DOMDocument();
$this->DOMDocument->loadHTML($markup);
}
示例4: getDom
/**
*
* @return \DOMDocument
*/
private function getDom()
{
if (is_null($this->dom)) {
$this->dom = new \DOMDocument();
$this->dom->loadHTML($this->htmlValidatorBodyContent);
}
return $this->dom;
}
示例5: __construct
/**
* LinkedCssImporter constructor.
* @param string $html
* @param string $filePath
* @param FileSystem $fileSystem
*/
public function __construct($html, $filePath, FileSystem $fileSystem)
{
$this->document = new \DOMDocument();
if (!$this->document->loadHTML($html)) {
throw new \InvalidArgumentException('Cannot process HTML as a valid document');
}
$this->filePath = pathinfo($filePath, PATHINFO_DIRNAME);
$this->fileSystem = $fileSystem;
}
示例6: setHtml
/**
* @param string $sHtml
* @throws \InvalidArgumentException
* @return \BoilerAppMessenger\StyleInliner\Processor\CssToInlineStylesProcessor
*/
private function setHtml($sHtml)
{
if (is_string($sHtml)) {
$this->domDocument = new \DOMDocument('1.0', $this->getEncoding());
$this->domDocument->loadHTML(preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F]+/u', '', $sHtml));
$this->css = '';
return $this->extractCss(null, $this->getBaseDir());
}
throw new \InvalidArgumentException('Html expects string, "' . gettype($sHtml) . '" given');
}
示例7: add_block_grids
function add_block_grids($content)
{
// DOMDocument seems to have problems with the long dash, this fixes it.
$content = mb_convert_encoding($content, 'utf-8', mb_detect_encoding($content));
$content = mb_convert_encoding($content, 'html-entities', 'utf-8');
$document = new DOMDocument('1.0', 'utf-8');
set_error_handler(function () {
/* ignore errors */
});
if (phpversion() >= 5.4) {
$document->loadHTML($content, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
} else {
$document->loadHTML($content);
}
restore_error_handler();
$xpath = new DOMXpath($document);
$blocks = $xpath->query("//p[starts-with(.,'::')]");
$block_groups = array();
$block_group = null;
$last_block = null;
foreach ($blocks as $block) {
$previous_sibling = get_real_previous_sibling($block);
if ($last_block && $previous_sibling && $previous_sibling->isSameNode($last_block)) {
// We're still in the same block group.
$block_group[] = $block;
} else {
if ($block_group) {
// We've found a new series of blocks, so start a new array for them.
$block_groups[] = $block_group;
$block_group = array($block);
} else {
// It's our first group
$block_group = array($block);
$block_groups[] =& $block_group;
}
}
$last_block = $block;
}
foreach ($block_groups as $block_group) {
$ul = $document->createElement('ul');
$count = count($block_group);
$ul->setAttribute('class', "medium-block-grid-{$count} takeaways innovate");
// Insert the UL before the block group p tags
$block_group[0]->parentNode->insertBefore($ul, $block_group[0]);
foreach ($block_group as $block) {
$li = $document->createElement('li');
$block->nodeValue = str_replace('::', '', $block->nodeValue);
$li->appendChild($block);
$ul->appendChild($li);
}
}
return preg_replace("~<(?:!DOCTYPE|/?(?:html|head|body))[^>]*>\\s*~i", '', $document->saveHTML());
}
示例8: convert
/**
* Convert HTML to Apple News Markdown.
*
* @param string $html
* HTML to convert. Value is not validated, it is caller's responsibility
* to validate.
*
* @return string|NULL
* Markdown representation of the HTML, or NULL if failed.
*/
public function convert($html)
{
if (preg_match('/^\\s*$/u', $html)) {
return '';
}
$html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head>' . $html . '</body></html>';
$this->dom = new \DOMDocument();
if (!$this->dom->loadHTML($html)) {
return NULL;
}
$xp = new \DOMXPath($this->dom);
return implode(self::BLOCK_DELIMITER, $this->getBlocks($xp->query('/html/body')->item(0)->childNodes));
}
示例9: convert
/**
* Tries to converts the given HTML into a plain text format
*
* @return string the HTML converted or empty string if not able to parse
*/
function convert()
{
$output = '';
libxml_use_internal_errors(true);
$success = $this->document->loadHTML($this->html);
libxml_clear_errors();
if ($success) {
$output = trim($this->render($this->document));
// Post clean up
$output = $this->postCleanUp($output);
}
return $output;
}
示例10: WiziappDOMLoader
/**
* Old-school Constructor
*
* @param string $html The html block, not a full document
* @param string $encoding The encoding used for the html block
* @return WiziappDOMLoader The html element as an array
*/
function WiziappDOMLoader($html = '', $encoding = 'UTF-8')
{
$this->encoding = $encoding;
if (!empty($html)) {
$html = $this->prepareHTMLString($html);
$this->dom = new DOMDocument('1.0', $this->encoding);
libxml_use_internal_errors(true);
@$this->dom->loadHTML($html);
$this->dom->encoding = $this->encoding;
libxml_clear_errors();
$this->dom->preserveWhiteSpace = false;
}
return;
}
示例11: loadHTML
/**
* @param string $html
* @return $this
* @throws \Exception
*/
public function loadHTML($html = "")
{
try {
// The HTML is UTF-8 encoded
$this->dom->loadHTML('<?xml encoding="UTF-8">' . $html);
$this->dom->encoding = 'UTF-8';
} catch (Exception $e) {
$search = array('DOMDocument::loadHTML():', 'Entity');
$replace = array(Yii::t('app', 'Check your code:'), Yii::t('app', 'Form'));
$message = str_replace($search, $replace, $e->getMessage());
throw new Exception($message, 5);
}
return $this;
}
示例12: parse
/**
* Parses a full HTML document.
* @param $text HTML text to parse
* @param $builder Custom builder implementation
* @return Parsed HTML as DOMDocument
*/
public static function parse($text, $builder = null)
{
// Cleanup invalid HTML
$doc = new DOMDocument();
if (mb_detect_encoding($text, "UTF-8", true) == "UTF-8") {
@$doc->loadHTML('<?xml encoding="UTF-8" ?>' . $text);
} else {
@$doc->loadHTML($text);
}
$text = $doc->saveHTML();
$tokenizer = new HTML5_Tokenizer($text, $builder);
$tokenizer->parse();
return $tokenizer->save();
}
示例13: testCorrectSetup
public function testCorrectSetup()
{
$cloneable = $this->prepareValidCloneableField();
$this->form->add($cloneable);
$this->assertInstanceOf('\\Phalcon\\DI', $this->form->get('cloneable_field')->getDecorator()->getDI());
$this->form->get('cloneable_field')->getDecorator()->setTemplateName('jquery');
$domDoc = new \DOMDocument('1.0');
$domDoc->loadHTML($this->form->get('cloneable_field')->render());
$this->assertEquals(2, $domDoc->getElementById('cloneable_field')->getElementsByTagName('fieldset')->length);
$this->assertEquals(4, $domDoc->getElementById('cloneable_field')->getElementsByTagName('input')->length);
$domDoc->loadHTML($this->form->get('cloneable_field')->render(['attribute' => 'test']));
$this->assertEquals('test', $domDoc->getElementById('cloneable_field')->attributes->getNamedItem('attribute')->value);
$this->assertNull($this->form->get('cloneable_field')->getBaseElement('test3'));
$this->assertInstanceOf('\\Phalcon\\Forms\\ElementInterface', $this->form->get('cloneable_field')->getBaseElement('test2'));
}
示例14: __construct
public function __construct($html, $sourceLang, $targetLang)
{
$this->doc = new \DOMDocument();
$this->doc->strictErrorChecking = FALSE;
$this->sourceLang = $sourceLang;
$this->targetLang = $targetLang;
$error = $this->errorStart();
// Setting meta below is a hack to get our DomDocument into utf-8. All other
// methods tried didn't work.
$success = $this->doc->loadHTML('<meta http-equiv="content-type" content="text/html; charset=utf-8"><div id="eggs-n-cereal-dont-ever-use-this-id">' . $html . '</div>');
$this->errorStop($error);
if (!$success) {
throw new \Exception('Invalid HTML');
}
}
示例15: __construct
/**
* Object constructor
*
* @param string $data
* @param boolean $isFile
* @param boolean $storeContent
*/
private function __construct($data, $isFile, $storeContent)
{
$this->_doc = new DOMDocument();
$this->_doc->substituteEntities = true;
if ($isFile) {
$htmlData = file_get_contents($data);
} else {
$htmlData = $data;
}
@$this->_doc->loadHTML($htmlData);
$xpath = new DOMXPath($this->_doc);
$docTitle = '';
$titleNodes = $xpath->query('/html/head/title');
foreach ($titleNodes as $titleNode) {
// title should always have only one entry, but we process all nodeset entries
$docTitle .= $titleNode->nodeValue . ' ';
}
$this->addField(Zend_Search_Lucene_Field::Text('title', $docTitle, $this->_doc->actualEncoding));
$metaNodes = $xpath->query('/html/head/meta[@name]');
foreach ($metaNodes as $metaNode) {
$this->addField(Zend_Search_Lucene_Field::Text($metaNode->getAttribute('name'), $metaNode->getAttribute('content'), $this->_doc->actualEncoding));
}
$docBody = '';
$bodyNodes = $xpath->query('/html/body');
foreach ($bodyNodes as $bodyNode) {
// body should always have only one entry, but we process all nodeset entries
$this->_retrieveNodeText($bodyNode, $docBody);
}
if ($storeContent) {
$this->addField(Zend_Search_Lucene_Field::Text('body', $docBody, $this->_doc->actualEncoding));
} else {
$this->addField(Zend_Search_Lucene_Field::UnStored('body', $docBody, $this->_doc->actualEncoding));
}
$linkNodes = $this->_doc->getElementsByTagName('a');
foreach ($linkNodes as $linkNode) {
if (($href = $linkNode->getAttribute('href')) != '' && (!self::$_excludeNoFollowLinks || strtolower($linkNode->getAttribute('rel')) != 'nofollow')) {
$this->_links[] = $href;
}
}
$this->_links = array_unique($this->_links);
$linkNodes = $xpath->query('/html/head/link');
foreach ($linkNodes as $linkNode) {
if (($href = $linkNode->getAttribute('href')) != '') {
$this->_headerLinks[] = $href;
}
}
$this->_headerLinks = array_unique($this->_headerLinks);
}