本文整理汇总了PHP中simple_html_dom::load方法的典型用法代码示例。如果您正苦于以下问题:PHP simple_html_dom::load方法的具体用法?PHP simple_html_dom::load怎么用?PHP simple_html_dom::load使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类simple_html_dom
的用法示例。
在下文中一共展示了simple_html_dom::load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: a587_getPlaintext
function a587_getPlaintext($_text, $_remove)
{
global $REX;
foreach (explode(',', $REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['order']) as $elem) {
switch ($elem) {
case 'selectors':
// remove elements selected by css-selectors
$html = new simple_html_dom();
$html->load($_text);
$html->remove($_remove);
$html->load($html->outertext);
$_text = $html->plaintext;
break;
case 'regex':
// regex
if (!empty($REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['regex'])) {
$regex = array();
$replacement = array();
$odd = true;
foreach (explode("\n", $REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['regex']) as $line) {
if ($line != '') {
if ($odd) {
$regex[] = trim($line);
} else {
$replacement[] = $line;
}
$odd = !$odd;
}
}
$_text = preg_replace($regex, $replacement, $_text);
}
break;
case 'textile':
// strip HTML-tags
if (!empty($REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['textile']) and function_exists('rex_a79_textile')) {
$_text = rex_a79_textile($_text);
}
break;
case 'striptags':
// strip HTML-tags
if (!empty($REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['striptags'])) {
$_text = strip_tags($_text);
}
break;
}
}
return $_text;
}
示例2: scrape_page
function scrape_page()
{
$row = 0;
$html = scraperWiki::scrape("http://asuntojen.hintatiedot.fi/haku/?c=" . $GLOBALS['c'] . "&s=" . $GLOBALS['s'] . "&r=" . $GLOBALS['r'] . "&amin=" . $GLOBALS['amin'] . "&amax=" . $GLOBALS['amax'] . "&z=" . $GLOBALS['z']);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("tr") as $data) {
$tds = $data->find("td");
if (count($tds) > 8) {
$row++;
$GLOBALS['rowTotal']++;
$apt = array("Uniikkiavain" => $GLOBALS['rowTotal'], "Kaupunginosa" => $tds[0]->plaintext, "Myyntihinta" => $tds[3]->plaintext, "Neliohinta" => $tds[4]->plaintext, "Tyyppi" => $tds[1]->plaintext, "Koko" => $tds[2]->plaintext);
scraperwiki::save_sqlite(null, $apt, $table_name = $GLOBALS['c'] . " " . $GLOBALS['time']);
print $GLOBALS['rowTotal'] . "\n";
print $row . ". Sijainti: " . $tds[0]->plaintext . " Hinta: " . $tds[3]->plaintext . " Tyyppi: " . $tds[1]->plaintext . " Koko: " . $tds[2]->plaintext . " Neliöhinta: " . $tds[4]->plaintext . "€" . "\n";
}
}
if ($row == 50) {
print "Vielä jatkuu, haetaan seuraava sivu..." . "\n";
$GLOBALS['z']++;
scrape_page();
} else {
print "Skrääpiminen suoritettu." . "\n";
print "Sivuja yhteensä: " . $GLOBALS['z'] . "\n";
print "Rivejä yhteensä: " . $GLOBALS['rowTotal'] . "\n";
}
}
示例3: run_ml
function run_ml($q_num = 0)
{
$html = scraperWiki::scrape("http://musiklegal.com/search/result/a/" . $q_num);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("tr") as $data) {
$tds = $data->find("td");
$temp_data = explode('">', str_replace('</<strong>a</strong>>', '', str_replace('<<strong>a</strong> href="http://musiklegal.com/song/detail/', '', $tds[1]->plaintext)));
$record = array('No' => str_replace('.', '', $tds[0]->plaintext), 'Code' => $temp_data[0], 'Song Title' => $temp_data[1], 'Artist' => $tds[2]->plaintext, 'Album' => $tds[3]->plaintext);
/*
* Stores results
*/
scraperwiki::save_sqlite(array("No"), $record);
unset($temp_data);
}
foreach ($dom->find("a") as $a) {
if ($a->plaintext == 'Next') {
$tmp_a = $a->href;
$tmp_a = str_replace('http://musiklegal.com/search/result/a/', '', $tmp_a);
if ($tmp_a > 0) {
continue;
}
}
}
if ((int) $tmp_a != 0) {
run_ml($tmp_a);
} else {
exit;
}
}
示例4: getUrl
/**
* Get Raw html of webpage
*
* @param bool $usepost
*
* @return bool
*/
private function getUrl($usepost = false)
{
if (isset($this->_trailUrl)) {
$ch = curl_init(self::POPURL . $this->_trailUrl);
} else {
$ch = curl_init(self::IF18);
}
if ($usepost === true) {
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $this->_postParams);
}
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_VERBOSE, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, "Firefox/2.0.0.1");
curl_setopt($ch, CURLOPT_FAILONERROR, 1);
if (isset($this->cookie)) {
curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookie);
}
curl_setopt_array($ch, newznab\utility\Utility::curlSslContextOptions());
$this->_response = curl_exec($ch);
if (!$this->_response) {
curl_close($ch);
return false;
}
curl_close($ch);
$this->_html->load($this->_response);
return true;
}
示例5: getCategories
function getCategories($u)
{
global $baseurl, $f;
$path = "";
$d = new simple_html_dom();
$d->load(scraperwiki::scrape($u));
echo "Loaded URL: " . $u . "\n";
if ($d->find('div[id=ctl00_cphContent_gsaCatFacetContainer]')) {
$breadcrumb = $d->find('div[id=breadcrumb]', 0);
//foreach($breadcrumb as $b) {
//echo "Breadcrumb = " . $b;}
if (!is_null($breadcrumb)) {
foreach ($breadcrumb->children() as $crumb) {
$path .= trim($crumb->innertext) . "/";
}
$path .= trim(strrchr($breadcrumb->innertext, ">"), "> ");
}
foreach ($d->find('div[id=ctl00_cphContent_gsaCatFacetContainer]', 0)->find('div[class=S2refinementsContainer]', 0)->children() as $div) {
$name = trim(strstr($div->children(0)->innertext, "(", true));
$url = $baseurl . $div->children(0)->href;
$data = array("Name" => $name, "Path" => $path, "URL" => $url);
echo $path . "/" . $name . "\n";
if ($local) {
fputcsv($f, array($name, $path, $url));
} else {
scraperwiki::save_sqlite(array("URL"), $data);
}
getCategories($url);
}
}
}
示例6: parse
/**
* 解析下返回来的信息
* @return string 解析成功后的信息
*/
public function parse()
{
require_once dirname(__FILE__) . '/simple_html_dom.php';
$data = $this->requestURL();
if (empty($data) || strlen($data < 100)) {
return $data;
}
//如过抓取到的内容是空的说明cookie失效了。
$html = new simple_html_dom();
$html->load($data);
$ymd = $html->find('.time-d');
$his = $html->find('.time-h');
$title = $html->find('.consume-title a');
$trade = $html->find('td.tradeNo p');
$name = $html->find('p.name');
$amount = $html->find('td.amount span');
if (!$trade) {
return 'no_order';
}
$info = array();
foreach ($ymd as $key => $value) {
//只要订单数字部分
preg_match('/\\d+/', $trade[$key]->innertext, $tradeNo);
//这里可以添加一些逻辑判断语句,例如存到数据库里面遍历查询这个订单是否已经通知成功
$info[] = array('time' => trim($ymd[$key]->innertext) . ' ' . trim($his[$key]->innertext), 'title' => trim($title[$key]->innertext), 'trade' => trim($tradeNo[0]), 'name' => trim($name[$key]->innertext), 'amount' => trim(str_replace('+', '', $amount[$key]->innertext)));
}
$html->clear();
return $info;
}
示例7: getUrl
/**
* Get Raw html of webpage
*
* @param bool $usepost
* @param string $site
*
* @return bool
*/
private function getUrl($usepost = false, $site = "straight")
{
if (isset($this->_trailUrl)) {
$ch = curl_init($this->_whichSite[$site] . $this->_trailUrl);
} else {
$ch = curl_init(self::IF18);
}
if ($usepost === true) {
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $this->_postParams);
}
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_VERBOSE, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, "Firefox/2.0.0.1");
curl_setopt($ch, CURLOPT_FAILONERROR, 1);
if (isset($this->cookie)) {
curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookie);
}
$this->_response = curl_exec($ch);
if (!$this->_response) {
curl_close($ch);
return false;
}
curl_close($ch);
$this->_html->load($this->_response);
return true;
}
示例8: scrapeHTML
function scrapeHTML($param, $type)
{
$html = scraperWiki::scrape("http://www.norwegian.no/fly/lavpris/?D_City=CPH&A_City=DUB&TripType=2&D_Day=1&D_Month=201104&R_Day=1&R_Month=201104&AdultCount=1&ChildCount=0&InfantCount=0");
$dom = new simple_html_dom();
$dom->load($html);
// Iterate over table rows and get flight details.
foreach ($dom->find("TR[@HEIGHT='25']") as $data) {
// Flight details.
$tds = $data->find("div");
$airline = removeSpaces($tds[0]->plaintext);
$flight_type = $type;
$flight_num = removeSpaces($tds[1]->plaintext);
$destination = removeSpaces($tds[2]->plaintext);
$time = removeSpaces($tds[3]->plaintext);
$gate = removeSpaces($tds[4]->plaintext);
$remarks = removeSpaces($tds[5]->plaintext);
// Skip header row. Cheesy, but effective.
if ($airline == "Airline") {
continue;
}
// Set the date.
$date = date("m.d.y");
// Build up record to store.
$flight_data = array("date" => $date, "airline" => $airline, "flight_type" => $flight_type, "flight_num" => $flight_num, "destination" => $destination, "time" => $time, "gate" => $gate, "remarks" => $remarks);
// Save the record.
saveData(array("date", "airline", "flight_type", "flight_num"), $flight_data);
}
$dom->clear();
}
示例9: grep_munich
function grep_munich($url, $table_name)
{
$html = scraperWiki::scrape($url);
$count = 0;
# Use the PHP Simple HTML DOM Parser to extract <td> tags
$dom = new simple_html_dom();
$dom->load($html);
//Drop all old informations by dropping the table
scraperwiki::sqliteexecute("drop table if exists " . $table_name);
scraperwiki::sqlitecommit();
$table = $dom->getElementById('flight_info_area');
foreach ($table->find('tr') as $data) {
// Flight details. Read tds or ths
$tds = $data->find("td");
//if there are less then 7 columns continue to next loop
if (sizeof($tds) < 7) {
continue;
}
//print $data->plaintext . "\n";
$flightnr = $tds[1]->plaintext;
$from = $tds[2]->plaintext;
$time = $tds[3]->plaintext;
$expected_time = $tds[4]->plaintext;
//Create date
$date = date("Y-m-d");
//Build array of flight informations
$flight_data = array("date" => $date, "count" => $count, "flightnr" => $flightnr, "from" => $from, "time" => $time, "expected_time" => $expected_time);
//Save the informations of one flight
scraperwiki::save_sqlite(array("date", "count"), $flight_data, $table_name);
$count = $count + 1;
}
}
示例10: scrapeTEDRSS
function scrapeTEDRSS($url, $sector)
{
print $url . " " . $sector . "\n";
// $xml = scraperWiki::scrape($url);
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
// 10 second before aborting
// try CURLOPT_CONNECTTIMEOUT (in seconds)
// try CURLOPT_LOW_SPEED_LIMIT (to define what slow is, with):
// curl_setopt($curl, CURLOPT_LOW_SPEED_TIME, 10); (10 second at low speed before aborting
$xml = curl_exec($curl);
print curl_error($curl) . "\n";
$dom = new simple_html_dom();
$dom->load($xml);
$items = $dom->find("item");
foreach ($items as $item) {
$guid = $item->find("guid");
$noticeURL = str_replace("TEXT", "DATA", $guid[0]->plaintext);
print $noticeURL . " " . $sector . " " . memory_get_usage() / 1000000 . "MB";
echo "\n";
// $record = scrapeTEDDataPage ($noticeURL, $sector);
$record = array('time' => microtime(true), 'sector' => $sector, 'url' => $noticeURL);
scraperwiki::save(array('sector', 'url'), $record);
sleep(1);
}
$dom->__destruct();
unset($items);
unset($dom);
unset($xml);
print memory_get_usage() / 1024 / 1024 . "MB\n";
}
示例11: compile
/**
* Compile a template file by reading it, converting the DOM using
* {@see convert()}, then applying macros using {@see transform()}.
* @param string $template Template file path.
* @return string PHP template content.
* @throws InvalidTemplateException If template is inaccessible or invalid.
*/
public function compile($template)
{
$dom = new \simple_html_dom();
$this->currentTemplate = $template;
$file = file_get_contents($template);
if ($file === false) {
throw new InvalidTemplateException(tr('Could not read template: %1', $template));
}
if (!$dom->load($file, true, false)) {
throw new InvalidTemplateException(tr('Could not parse template: %1', $template));
}
$root = new InternalNode();
$main = $dom->find('[j:main]', 0);
if (isset($main)) {
$root->append($this->convert($main));
} else {
foreach ($dom->find('*, text') as $html) {
if ($html->parent->tag != 'root') {
continue;
}
$root->append($this->convert($html));
}
}
$this->transform($root);
return $root->__toString();
}
示例12: getCardInfo
function getCardInfo($url)
{
$baseURL = 'http://gatherer.wizards.com/Pages/Card/';
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$cardImage = $dom->find('img[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_cardImage]', 0)->src;
$cardImage = str_replace("amp;", "", $cardImage);
$imgURL = $baseURL . $cardImage;
$name = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_nameRow] div[class=value]', 0)->plaintext;
$name = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $name);
$mana = "";
$manaImages = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_manaRow] div[class=value] img');
foreach ($manaImages as $manaItem) {
$mana .= substr($manaItem->alt, 0, 1);
}
$mana = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $mana);
$cmc = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_cmcRow] div[class=value]', 0);
$cmc = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $cmc);
$type = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_typeRow] div[class=value]', 0);
$type = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $type);
$text = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_textRow] div[class=value]', 0);
$text = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $text);
$flavor = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_flavorRow] div[class=value]', 0);
$flavor = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $flavor);
$cardNumber = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_numberRow] div[class=value]', 0);
$cardNumber = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $cardNumber);
$artist = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_artistRow] div[class=value]', 0);
$artist = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $artist);
$rarity = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_rarityRow] div[class=value]', 0);
$rarity = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $rarity);
$set = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_setRow] div[class=value]', 0);
$set = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $set);
scraperwiki::save_sqlite(array("card"), array("Card" => trim($name), "Image" => $imgURL, "Mana" => trim($mana), "CMC" => trim($cmc), "Type" => trim($type), "Card Text" => trim($text), "Flavor Text" => trim($flavor), "Artist" => trim($artist), "Card Number" => trim($cardNumber), "Rarity" => trim($rarity), "Expansion" => trim($set)));
}
示例13: do_day
function do_day($rec)
{
$html = scraperwiki::scrape($rec['url']);
$dom = new simple_html_dom();
$dom->load($html);
$cell = $dom->find('a[name=discs]');
$lines = $cell[0]->parent->find('text');
print $lines[10] . "\n";
print count($lines) . "\n";
# loop by number, as null lines stop a foreach
$n = 0;
for ($line_no = 0; $line_no < count($lines); $line_no++) {
$line = $lines[$line_no];
if (strlen($line) == 3) {
# the DOM object crashes on this row, so ignore
continue;
}
#if (preg_match("#^" . $n . "#", $line, $matches)) {
print $line_no . " " . strlen($line) . "\n";
$n = $n + 1;
print $line . "\n";
#}
}
#scraperwiki::save(array('data'), array('data' => $data->plaintext));
}
示例14: absolutizeHtml
public static function absolutizeHtml($sBaseUrl, $sHtml)
{
$oHtml = new simple_html_dom();
$oHtml->load($sHtml);
$aTags = $oHtml->find('a');
foreach ($aTags as $oTag) {
$oTag->href = self::absolutizeUrl($sBaseUrl, $oTag->href);
}
$aTags = $oHtml->find('img');
foreach ($aTags as $oTag) {
$oTag->src = self::absolutizeUrl($sBaseUrl, $oTag->src);
}
$aTags = $oHtml->find('script');
foreach ($aTags as $oTag) {
$oTag->src = self::absolutizeUrl($sBaseUrl, $oTag->src);
}
$aTags = $oHtml->find('link');
foreach ($aTags as $oTag) {
$oTag->href = self::absolutizeUrl($sBaseUrl, $oTag->href);
}
// Parse url() in inline css
$aTags = $oHtml->find('style');
foreach ($aTags as $oTag) {
$oTag->innertext = preg_replace_callback('|url\\s*\\(\\s*[\'"]?([^\'"\\)]+)[\'"]?\\s*\\)|', function ($aMatches) use($sBaseUrl) {
return 'url("' . trim(self::absolutizeUrl($sBaseUrl, $aMatches[1])) . '")';
}, $oTag->innertext);
}
return $oHtml . '';
}
示例15: scrapPage
function scrapPage($page)
{
print "Scraping page " . $page;
$url = "http://www.geipan.fr/index.php?id=202";
$fields_string = "&no_cache=1&" . "tx_geipansearch_pi1%5Bsubmit_form%5D=1&" . "tx_geipansearch_pi1%5Btexte_resume%5D=&" . "tx_geipansearch_pi1%5Bdate_debut%5D=&" . "tx_geipansearch_pi1%5Bdate_fin%5D=&" . "no_cache=1&" . "tx_geipansearch_pi1%5Bclasse_cas%5D=tous&" . "tx_geipansearch_pi1%5Bregion%5D=&" . "page=" . $page . "&" . "order_by=&" . "sens=";
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
curl_setopt($curl, CURLOPT_POST, 11);
curl_setopt($curl, CURLOPT_POSTFIELDS, $fields_string);
$html = curl_exec($curl);
print curl_error($curl) . "\n";
// print($html);
$dom = new simple_html_dom();
$dom->load($html);
$trs = $dom->find("tr");
foreach ($trs as $tr) {
if (isset($tr->attr['onclick'])) {
$ID = substr($tr->attr['onclick'], strpos($tr->attr['onclick'], "cas=") + 4, 13);
print $ID . "\n";
$tds = $tr->find("td");
$title = utf8_encode($tds[0]->plaintext);
$date = $tds[1]->plaintext;
$departement = utf8_encode($tds[2]->plaintext);
$classe = $tds[3]->plaintext;
$maj = $tds[4]->plaintext;
$city = substr($title, 0, strpos($title, "(") - 1);
$record = array('ID' => $ID, 'title' => $title, 'date' => $date, 'departement' => $departement, 'classe' => $classe, 'maj' => $maj, 'city' => $city);
scraperwiki::save(array('ID', 'maj'), $record);
}
}
}