本文整理汇总了PHP中simple_html_dom类的典型用法代码示例。如果您正苦于以下问题:PHP simple_html_dom类的具体用法?PHP simple_html_dom怎么用?PHP simple_html_dom使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了simple_html_dom类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: handleDocumentInfo
function handleDocumentInfo($DocInfo)
{
echo "Page requested: " . $DocInfo->url . " (" . $DocInfo->http_status_code . ")" . PHP_EOL;
if ($DocInfo->http_status_code == '200' and $DocInfo->received and $DocInfo->content_type == 'text/html' and isset($DocInfo->content)) {
$html = $DocInfo->content;
$host = $DocInfo->host;
$urlPosted = $DocInfo->url;
$htmldom = new simple_html_dom();
$htmldom->load($html);
$data = array();
$images = $htmldom->find('ul#ListViewInner li img');
echo 'Total images' . count($images) . '' . PHP_EOL;
$i = 0;
foreach ($images as $raw_links) {
echo $raw_links->alt . "','1','" . $raw_links->src . "\n";
$filename = '_e__' . $i . '.jpg';
copy(str_replace("l225 ", "l900", $raw_links->src), 'downloads/' . $filename);
$i++;
}
echo json_encode($data) . PHP_EOL;
//$writer->writeRow(json_encode($data));
unset($data);
unset($htmldom);
}
}
示例2: parse
public function parse($isUpdate = false)
{
Ibos::import("application.extensions.simple_html_dom", true);
if ($isUpdate) {
$model = preg_replace("/\\s+data-id\\s?=\\s?\"?\\d+\"?/i", "", $this->printmodel);
$max = 0;
} else {
$model = $this->printmodel;
$max = intval($this->itemmax);
}
$elements = array();
$doc = new simple_html_dom();
$doc->load($model, true, true, CHARSET);
$items = $doc->find("ic");
$config = $this->getItemConfig();
if (!empty($items) && !empty($config)) {
$this->refactor($items, $config, $max, $elements);
}
$html = $doc->save();
$this->_cache = $elements;
CacheUtil::set("form_" . $this->ID, $elements);
$form["printmodelshort"] = $html;
if ($max != $this->itemmax) {
$form["itemmax"] = $max;
}
$doc->clear();
FlowFormType::model()->modify($this->ID, $form);
}
示例3: run_ml
function run_ml($q_num = 0)
{
$html = scraperWiki::scrape("http://musiklegal.com/search/result/a/" . $q_num);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("tr") as $data) {
$tds = $data->find("td");
$temp_data = explode('">', str_replace('</<strong>a</strong>>', '', str_replace('<<strong>a</strong> href="http://musiklegal.com/song/detail/', '', $tds[1]->plaintext)));
$record = array('No' => str_replace('.', '', $tds[0]->plaintext), 'Code' => $temp_data[0], 'Song Title' => $temp_data[1], 'Artist' => $tds[2]->plaintext, 'Album' => $tds[3]->plaintext);
/*
* Stores results
*/
scraperwiki::save_sqlite(array("No"), $record);
unset($temp_data);
}
foreach ($dom->find("a") as $a) {
if ($a->plaintext == 'Next') {
$tmp_a = $a->href;
$tmp_a = str_replace('http://musiklegal.com/search/result/a/', '', $tmp_a);
if ($tmp_a > 0) {
continue;
}
}
}
if ((int) $tmp_a != 0) {
run_ml($tmp_a);
} else {
exit;
}
}
示例4: getMoviesUrl
/**
* 動画のURLを取得する
*
* @param simple_html_dom $html
* @return array
**/
public function getMoviesUrl($html)
{
$query = 'div.entryBody div.topmore a img';
$movies_els = $html->find($query);
$movie_data = array();
$manager = new UriManager();
// 動画はこちらテキストのリンクを取得する
foreach ($movies_els as $movies_el) {
if (!preg_match('/^動画.+/', $movies_el->getAttribute('alt'))) {
continue;
}
// 親のaタグからリンクを取得する
$parent_el = $next_el = $movies_el->parentNode();
$i = 0;
while ($i < 3) {
$next_el = $next_el->nextSibling();
if (is_null($next_el)) {
break;
}
$i++;
}
if ($next_el->nodeName() == 'span') {
$movie_data = [];
break;
}
if ($parent_el->nodeName() == 'a') {
$movie_data[] = $manager->resolve($parent_el->getAttribute('href'));
}
}
return $movie_data;
}
示例5: getProducts
function getProducts($u, $cat)
{
global $o;
$d = new simple_html_dom();
$d->load(scraperwiki::scrape($u));
//echo "Loaded URL: " . $u . "\n";
$items = $d->find('li.grid-item');
if (count($items) > 0) {
foreach ($items as $p) {
$prod = $p->find('p.product-name > a', 0);
$prodname = trim($prod->innertext);
$prodURL = $prod->href;
if (!is_null($p->find('p.minimal-price', 0))) {
$prodtype = 1;
} else {
$prodtype = 0;
}
fputcsv($o, array($prodname, $prodtype, $cat, $prodURL));
echo $prodname . "\n";
}
if (!is_null($d->find('p.next', 0))) {
getProducts($d->find('p.next', 0)->href, $cat);
}
}
}
示例6: get_data
/**
* curl 访问 开奖数据
*/
private function get_data()
{
include_once 'simplehtmldom_1_5/simple_html_dom.php';
$simple_html_dom = new \simple_html_dom();
//zlib 解压 并转码
$data = false;
$data = @file_get_contents("compress.zlib://" . self::URL);
if (!$data) {
$this->setLog(false, '重庆时时彩-开奖数据抓取失败');
exit('重庆时时彩-数据抓取失败,请尽快联系网站管理员' . "\r\n");
}
//转换成 UTF-8编码
$encode = mb_detect_encoding($data, array('ASCII', 'UTF-8', 'GB2312', "GBK", 'BIG5'));
$content = iconv($encode, 'UTF-8', $data);
$simple_html_dom->load($content);
//开奖期号
$qihao = $simple_html_dom->find('div[class=aside]', 0)->find('h3', 0)->find('em', 0)->plaintext;
//开奖号
$code = $simple_html_dom->find('div[class=aside]', 0)->find('div[class=mod-aside mod-aside-xssckj]', 0)->find('div[class=bd]', 0)->find('div[class=kpkjcode]', 0)->find('table', 0)->find('tr', 1)->find('td', 1)->plaintext;
if ($code == '--') {
exit('重庆时时彩-等待开奖...' . "\r\n");
}
$isKaiJiang = $simple_html_dom->find('div[class=aside]', 0)->find('div[class=mod-aside mod-aside-xssckj]', 0)->find('div[class=bd]', 0)->find('div[class=kpkjcode]', 0)->find('table', 0)->find('tr', 1)->find('td', 2)->plaintext;
if ($isKaiJiang == '--' && $isKaiJiang == '开奖中') {
exit('重庆时时彩-等待开奖...' . "\r\n");
}
$simple_html_dom->clear();
//将开奖号中间的空格去掉
$code = str_replace(" ", '', $code);
//开奖时间
$kjsj = date('Y-m-d H:i:s');
$this->data = ['qihao' => $qihao, 'kjsj' => $kjsj, 'code' => $code];
}
示例7: getMoviesUrl
/**
* 動画のURLを取得する
*
* @param simple_html_dom $html
* @return array
**/
public function getMoviesUrl($html)
{
$query = 'div.ently_body div.ently_text div.video-container iframe';
$movies_els = $html->find($query);
$movie_data = array();
$manager = new UriManager();
// 動画はこちらテキストのリンクを取得する
foreach ($movies_els as $movies_el) {
if ($movies_el->hasAttribute('src')) {
$url = $manager->resolve($movies_el->getAttribute('src'));
if ($url !== false) {
$movie_data[] = $url;
}
}
}
$query = 'div.ently_outline div.ently_body a';
$movies_els = $html->find($query);
foreach ($movies_els as $movies_el) {
$text = $movies_el->plaintext;
if (preg_match('/リンク(/', $text) && $movies_el->hasAttribute('href')) {
$resolve_url = $manager->resolve($movies_el->getAttribute('href'));
if ($resolve_url) {
$movie_data[] = $resolve_url;
}
}
}
return $movie_data;
}
示例8: do_day
function do_day($rec)
{
$html = scraperwiki::scrape($rec['url']);
$dom = new simple_html_dom();
$dom->load($html);
$cell = $dom->find('a[name=discs]');
$lines = $cell[0]->parent->find('text');
print $lines[10] . "\n";
print count($lines) . "\n";
# loop by number, as null lines stop a foreach
$n = 0;
for ($line_no = 0; $line_no < count($lines); $line_no++) {
$line = $lines[$line_no];
if (strlen($line) == 3) {
# the DOM object crashes on this row, so ignore
continue;
}
#if (preg_match("#^" . $n . "#", $line, $matches)) {
print $line_no . " " . strlen($line) . "\n";
$n = $n + 1;
print $line . "\n";
#}
}
#scraperwiki::save(array('data'), array('data' => $data->plaintext));
}
示例9: compile
/**
* Compile a template file by reading it, converting the DOM using
* {@see convert()}, then applying macros using {@see transform()}.
* @param string $template Template file path.
* @return string PHP template content.
* @throws InvalidTemplateException If template is inaccessible or invalid.
*/
public function compile($template)
{
$dom = new \simple_html_dom();
$this->currentTemplate = $template;
$file = file_get_contents($template);
if ($file === false) {
throw new InvalidTemplateException(tr('Could not read template: %1', $template));
}
if (!$dom->load($file, true, false)) {
throw new InvalidTemplateException(tr('Could not parse template: %1', $template));
}
$root = new InternalNode();
$main = $dom->find('[j:main]', 0);
if (isset($main)) {
$root->append($this->convert($main));
} else {
foreach ($dom->find('*, text') as $html) {
if ($html->parent->tag != 'root') {
continue;
}
$root->append($this->convert($html));
}
}
$this->transform($root);
return $root->__toString();
}
示例10: scrapPage
function scrapPage($page)
{
print "Scraping page " . $page;
$url = "http://www.geipan.fr/index.php?id=202";
$fields_string = "&no_cache=1&" . "tx_geipansearch_pi1%5Bsubmit_form%5D=1&" . "tx_geipansearch_pi1%5Btexte_resume%5D=&" . "tx_geipansearch_pi1%5Bdate_debut%5D=&" . "tx_geipansearch_pi1%5Bdate_fin%5D=&" . "no_cache=1&" . "tx_geipansearch_pi1%5Bclasse_cas%5D=tous&" . "tx_geipansearch_pi1%5Bregion%5D=&" . "page=" . $page . "&" . "order_by=&" . "sens=";
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
curl_setopt($curl, CURLOPT_POST, 11);
curl_setopt($curl, CURLOPT_POSTFIELDS, $fields_string);
$html = curl_exec($curl);
print curl_error($curl) . "\n";
// print($html);
$dom = new simple_html_dom();
$dom->load($html);
$trs = $dom->find("tr");
foreach ($trs as $tr) {
if (isset($tr->attr['onclick'])) {
$ID = substr($tr->attr['onclick'], strpos($tr->attr['onclick'], "cas=") + 4, 13);
print $ID . "\n";
$tds = $tr->find("td");
$title = utf8_encode($tds[0]->plaintext);
$date = $tds[1]->plaintext;
$departement = utf8_encode($tds[2]->plaintext);
$classe = $tds[3]->plaintext;
$maj = $tds[4]->plaintext;
$city = substr($title, 0, strpos($title, "(") - 1);
$record = array('ID' => $ID, 'title' => $title, 'date' => $date, 'departement' => $departement, 'classe' => $classe, 'maj' => $maj, 'city' => $city);
scraperwiki::save(array('ID', 'maj'), $record);
}
}
}
示例11: getSearchResults
public function getSearchResults(simple_html_dom $dom)
{
$result = array();
$count = count($dom->find('div.srg'));
if ($count) {
// if found div.srg
$c = $count > 1 ? 1 : 0;
// if this is first page, we have 2 divs, first with some irrelevant
//links, so skip the first page
$d = $dom->find('div.srg', $c);
// get second div(if this is 1st page), or first div
} else {
// no div.srg found, search all page
$d = $dom;
}
foreach ($d->find('div.rc') as $div) {
$a = $div->find('h3.r a', 0);
// get link to the website
//Get original image url
$originalImg = $div->find('div.th a', 0);
preg_match('/imgurl=(.+?)&/', $originalImg->href, $matches);
$result[] = array(htmlspecialchars_decode($a->plaintext, ENT_QUOTES), $a->href, $matches[1]);
}
return $result;
}
示例12: scrape_page
function scrape_page()
{
$row = 0;
$html = scraperWiki::scrape("http://asuntojen.hintatiedot.fi/haku/?c=" . $GLOBALS['c'] . "&s=" . $GLOBALS['s'] . "&r=" . $GLOBALS['r'] . "&amin=" . $GLOBALS['amin'] . "&amax=" . $GLOBALS['amax'] . "&z=" . $GLOBALS['z']);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("tr") as $data) {
$tds = $data->find("td");
if (count($tds) > 8) {
$row++;
$GLOBALS['rowTotal']++;
$apt = array("Uniikkiavain" => $GLOBALS['rowTotal'], "Kaupunginosa" => $tds[0]->plaintext, "Myyntihinta" => $tds[3]->plaintext, "Neliohinta" => $tds[4]->plaintext, "Tyyppi" => $tds[1]->plaintext, "Koko" => $tds[2]->plaintext);
scraperwiki::save_sqlite(null, $apt, $table_name = $GLOBALS['c'] . " " . $GLOBALS['time']);
print $GLOBALS['rowTotal'] . "\n";
print $row . ". Sijainti: " . $tds[0]->plaintext . " Hinta: " . $tds[3]->plaintext . " Tyyppi: " . $tds[1]->plaintext . " Koko: " . $tds[2]->plaintext . " Neliöhinta: " . $tds[4]->plaintext . "€" . "\n";
}
}
if ($row == 50) {
print "Vielä jatkuu, haetaan seuraava sivu..." . "\n";
$GLOBALS['z']++;
scrape_page();
} else {
print "Skrääpiminen suoritettu." . "\n";
print "Sivuja yhteensä: " . $GLOBALS['z'] . "\n";
print "Rivejä yhteensä: " . $GLOBALS['rowTotal'] . "\n";
}
}
示例13: save
public function save($html, $dir)
{
import("@.ORG.htmltodocx.documentation.support_functions");
$phpword_object = new PHPWord();
$section = $phpword_object->createSection();
// HTML Dom object:
$html_dom = new simple_html_dom();
$html_dom->load('<html><body>' . $html . '</body></html>');
// Note, we needed to nest the html in a couple of dummy elements.
// Create the dom array of elements which we are going to work on:
$html_dom_array = $html_dom->find('html', 0)->children();
// We need this for setting base_root and base_path in the initial_state array
// (below). We are using a function here (derived from Drupal) to create these
// paths automatically - you may want to do something different in your
// implementation. This function is in the included file
// documentation/support_functions.inc.
$paths = htmltodocx_paths();
// Provide some initial settings:
$initial_state = array('phpword_object' => &$phpword_object, 'base_root' => $paths['base_root'], 'base_path' => $paths['base_path'], 'current_style' => array('size' => '11'), 'parents' => array(0 => 'body'), 'list_depth' => 0, 'context' => 'section', 'pseudo_list' => TRUE, 'pseudo_list_indicator_font_name' => 'Wingdings', 'pseudo_list_indicator_font_size' => '7', 'pseudo_list_indicator_character' => 'l ', 'table_allowed' => TRUE, 'treat_div_as_paragraph' => TRUE, 'style_sheet' => htmltodocx_styles_example());
// Convert the HTML and put it into the PHPWord object
htmltodocx_insert_html($section, $html_dom_array[0]->nodes, $initial_state);
// Clear the HTML dom object:
$html_dom->clear();
unset($html_dom);
// Save File
$str = explode(".", $h2d_file_uri);
$h2d_file_uri = $dir . "wordtemp/" . time() . ".docx";
if (!file_exists($dir . "wordtemp/")) {
$this->createFolders($dir . "wordtemp/");
//判断目标文件夹是否存在
}
$objWriter = PHPWord_IOFactory::createWriter($phpword_object, 'Word2007');
$objWriter->save($h2d_file_uri);
return $h2d_file_uri;
}
示例14: handleDocumentInfo
function handleDocumentInfo($DocInfo)
{
// global $writer;
echo "Page requested: " . $DocInfo->url . " (" . $DocInfo->http_status_code . ")" . PHP_EOL;
if ($DocInfo->http_status_code == '200' and $DocInfo->received and $DocInfo->content_type == 'text/html' and isset($DocInfo->content)) {
$html = $DocInfo->content;
$host = $DocInfo->host;
$urlPosted = $DocInfo->url;
$htmldom = new simple_html_dom();
$htmldom->load($html);
$data = array();
$images = $htmldom->find('.pinHolder img');
echo 'Total Images ' . count($images) . PHP_EOL;
$i = intval(Date("YmdHis"));
foreach ($images as $raw_links) {
$data['items'][] = array("title" => $raw_links->alt, "img" => $raw_links->src);
copy($raw_links->src, 'downloads/' . $i . '.jpg');
$i++;
}
echo json_encode($data) . PHP_EOL;
//$writer->writeRow(json_encode($data));
unset($data);
unset($htmldom);
}
}
示例15: parsing
private function parsing($scrappedData)
{
$result = [];
//Create a DOM parser object
$html = new simple_html_dom();
//Parse the HTML from Amazon.
$html->load($scrappedData);
# Iterate over all the tags
foreach ($html->find('li[class=s-result-item]') as $key => $innerData) {
//image
foreach ($innerData->find('img[class=s-access-image]') as $img) {
$atmp['image'] = $img->getAttribute('src');
}
//title
foreach ($innerData->find('h2[class=s-access-title]') as $title) {
$atmp['title'] = $title->innertext();
}
//price
foreach ($innerData->find('span[class=s-price]') as $price) {
$price = $price->innertext();
$atmp['price'] = $price;
$atmp['numPrice'] = str_replace(",", '', substr($price, 1));
}
//total page
foreach ($html->find('span[class=pagnDisabled]') as $maxPage) {
$atmp['totalPage'] = $maxPage->innertext();
}
# Show the <a href>
if (isset($atmp)) {
$result[$key] = $atmp;
}
}
return $this->aResult = $result;
}