本文整理汇总了PHP中simple_html_dom::find方法的典型用法代码示例。如果您正苦于以下问题:PHP simple_html_dom::find方法的具体用法?PHP simple_html_dom::find怎么用?PHP simple_html_dom::find使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类simple_html_dom
的用法示例。
在下文中一共展示了simple_html_dom::find方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: getProducts
function getProducts($u, $cat)
{
global $o;
$d = new simple_html_dom();
$d->load(scraperwiki::scrape($u));
//echo "Loaded URL: " . $u . "\n";
$items = $d->find('li.grid-item');
if (count($items) > 0) {
foreach ($items as $p) {
$prod = $p->find('p.product-name > a', 0);
$prodname = trim($prod->innertext);
$prodURL = $prod->href;
if (!is_null($p->find('p.minimal-price', 0))) {
$prodtype = 1;
} else {
$prodtype = 0;
}
fputcsv($o, array($prodname, $prodtype, $cat, $prodURL));
echo $prodname . "\n";
}
if (!is_null($d->find('p.next', 0))) {
getProducts($d->find('p.next', 0)->href, $cat);
}
}
}
示例2: parsing
private function parsing($scrappedData)
{
$result = [];
//Create a DOM parser object
$html = new simple_html_dom();
//Parse the HTML from Amazon.
$html->load($scrappedData);
# Iterate over all the tags
foreach ($html->find('li[class=s-result-item]') as $key => $innerData) {
//image
foreach ($innerData->find('img[class=s-access-image]') as $img) {
$atmp['image'] = $img->getAttribute('src');
}
//title
foreach ($innerData->find('h2[class=s-access-title]') as $title) {
$atmp['title'] = $title->innertext();
}
//price
foreach ($innerData->find('span[class=s-price]') as $price) {
$price = $price->innertext();
$atmp['price'] = $price;
$atmp['numPrice'] = str_replace(",", '', substr($price, 1));
}
//total page
foreach ($html->find('span[class=pagnDisabled]') as $maxPage) {
$atmp['totalPage'] = $maxPage->innertext();
}
# Show the <a href>
if (isset($atmp)) {
$result[$key] = $atmp;
}
}
return $this->aResult = $result;
}
示例3: clubURL
function clubURL($url)
{
$html = scraperwiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$clubName = trim(str_replace(' ', '', $dom->find('table', 0)->find('tr', 2)->plaintext));
$formatClubName = trim(preg_replace('/\\s+/', ' ', $clubName));
$_GLOBAL['clubs'][] = $formatClubName;
echo 'running ' . $formatClubName . "\n";
foreach ($dom->find('table', 2)->find('tr') as $row) {
if (is_numeric($row->find('td', 0)->plaintext)) {
$year = trim($row->find('td', 0)->plaintext);
$position = trim(str_replace(' ', '', $row->find('td', 1)->plaintext));
if (trim($position) == 'Champion') {
$position = 1;
}
$leagueLevel = trim($row->find('td', 2)->plaintext);
$overallPosition = trim($row->find('td', 3)->plaintext);
$avgAttendance = trim(str_replace('.', '', $row->find('td', 4)->plaintext));
$totalAttendance = trim(str_replace('.', '', $row->find('td', 12)->plaintext));
$dataset = array('club' => $formatClubName, 'year' => $year, 'finishedPosition' => $position, 'league' => $leagueLevel, 'overallPosition' => $overallPosition, 'avgAttendance' => $avgAttendance, 'totalAttendance' => $totalAttendance);
scraperwiki::save(array('club', 'year'), $dataset);
}
}
/*
* The next to lines stop a memory leak in Simple XML as per http://simplehtmldom.sourceforge.net/manual_faq.htm#memory_leak
*/
$dom->clear();
unset($dom);
}
示例4: run_ml
function run_ml($q_num = 0)
{
$html = scraperWiki::scrape("http://musiklegal.com/search/result/a/" . $q_num);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("tr") as $data) {
$tds = $data->find("td");
$temp_data = explode('">', str_replace('</<strong>a</strong>>', '', str_replace('<<strong>a</strong> href="http://musiklegal.com/song/detail/', '', $tds[1]->plaintext)));
$record = array('No' => str_replace('.', '', $tds[0]->plaintext), 'Code' => $temp_data[0], 'Song Title' => $temp_data[1], 'Artist' => $tds[2]->plaintext, 'Album' => $tds[3]->plaintext);
/*
* Stores results
*/
scraperwiki::save_sqlite(array("No"), $record);
unset($temp_data);
}
foreach ($dom->find("a") as $a) {
if ($a->plaintext == 'Next') {
$tmp_a = $a->href;
$tmp_a = str_replace('http://musiklegal.com/search/result/a/', '', $tmp_a);
if ($tmp_a > 0) {
continue;
}
}
}
if ((int) $tmp_a != 0) {
run_ml($tmp_a);
} else {
exit;
}
}
示例5: getCategories
function getCategories($u)
{
global $baseurl, $f;
$path = "";
$d = new simple_html_dom();
$d->load(scraperwiki::scrape($u));
echo "Loaded URL: " . $u . "\n";
if ($d->find('div[id=ctl00_cphContent_gsaCatFacetContainer]')) {
$breadcrumb = $d->find('div[id=breadcrumb]', 0);
//foreach($breadcrumb as $b) {
//echo "Breadcrumb = " . $b;}
if (!is_null($breadcrumb)) {
foreach ($breadcrumb->children() as $crumb) {
$path .= trim($crumb->innertext) . "/";
}
$path .= trim(strrchr($breadcrumb->innertext, ">"), "> ");
}
foreach ($d->find('div[id=ctl00_cphContent_gsaCatFacetContainer]', 0)->find('div[class=S2refinementsContainer]', 0)->children() as $div) {
$name = trim(strstr($div->children(0)->innertext, "(", true));
$url = $baseurl . $div->children(0)->href;
$data = array("Name" => $name, "Path" => $path, "URL" => $url);
echo $path . "/" . $name . "\n";
if ($local) {
fputcsv($f, array($name, $path, $url));
} else {
scraperwiki::save_sqlite(array("URL"), $data);
}
getCategories($url);
}
}
}
示例6: compile
/**
* Compile a template file by reading it, converting the DOM using
* {@see convert()}, then applying macros using {@see transform()}.
* @param string $template Template file path.
* @return string PHP template content.
* @throws InvalidTemplateException If template is inaccessible or invalid.
*/
public function compile($template)
{
$dom = new \simple_html_dom();
$this->currentTemplate = $template;
$file = file_get_contents($template);
if ($file === false) {
throw new InvalidTemplateException(tr('Could not read template: %1', $template));
}
if (!$dom->load($file, true, false)) {
throw new InvalidTemplateException(tr('Could not parse template: %1', $template));
}
$root = new InternalNode();
$main = $dom->find('[j:main]', 0);
if (isset($main)) {
$root->append($this->convert($main));
} else {
foreach ($dom->find('*, text') as $html) {
if ($html->parent->tag != 'root') {
continue;
}
$root->append($this->convert($html));
}
}
$this->transform($root);
return $root->__toString();
}
示例7: getCardInfo
function getCardInfo($url)
{
$baseURL = 'http://gatherer.wizards.com/Pages/Card/';
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$cardImage = $dom->find('img[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_cardImage]', 0)->src;
$cardImage = str_replace("amp;", "", $cardImage);
$imgURL = $baseURL . $cardImage;
$name = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_nameRow] div[class=value]', 0)->plaintext;
$name = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $name);
$mana = "";
$manaImages = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_manaRow] div[class=value] img');
foreach ($manaImages as $manaItem) {
$mana .= substr($manaItem->alt, 0, 1);
}
$mana = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $mana);
$cmc = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_cmcRow] div[class=value]', 0);
$cmc = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $cmc);
$type = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_typeRow] div[class=value]', 0);
$type = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $type);
$text = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_textRow] div[class=value]', 0);
$text = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $text);
$flavor = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_flavorRow] div[class=value]', 0);
$flavor = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $flavor);
$cardNumber = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_numberRow] div[class=value]', 0);
$cardNumber = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $cardNumber);
$artist = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_artistRow] div[class=value]', 0);
$artist = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $artist);
$rarity = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_rarityRow] div[class=value]', 0);
$rarity = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $rarity);
$set = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_setRow] div[class=value]', 0);
$set = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $set);
scraperwiki::save_sqlite(array("card"), array("Card" => trim($name), "Image" => $imgURL, "Mana" => trim($mana), "CMC" => trim($cmc), "Type" => trim($type), "Card Text" => trim($text), "Flavor Text" => trim($flavor), "Artist" => trim($artist), "Card Number" => trim($cardNumber), "Rarity" => trim($rarity), "Expansion" => trim($set)));
}
示例8: absolutizeHtml
public static function absolutizeHtml($sBaseUrl, $sHtml)
{
$oHtml = new simple_html_dom();
$oHtml->load($sHtml);
$aTags = $oHtml->find('a');
foreach ($aTags as $oTag) {
$oTag->href = self::absolutizeUrl($sBaseUrl, $oTag->href);
}
$aTags = $oHtml->find('img');
foreach ($aTags as $oTag) {
$oTag->src = self::absolutizeUrl($sBaseUrl, $oTag->src);
}
$aTags = $oHtml->find('script');
foreach ($aTags as $oTag) {
$oTag->src = self::absolutizeUrl($sBaseUrl, $oTag->src);
}
$aTags = $oHtml->find('link');
foreach ($aTags as $oTag) {
$oTag->href = self::absolutizeUrl($sBaseUrl, $oTag->href);
}
// Parse url() in inline css
$aTags = $oHtml->find('style');
foreach ($aTags as $oTag) {
$oTag->innertext = preg_replace_callback('|url\\s*\\(\\s*[\'"]?([^\'"\\)]+)[\'"]?\\s*\\)|', function ($aMatches) use($sBaseUrl) {
return 'url("' . trim(self::absolutizeUrl($sBaseUrl, $aMatches[1])) . '")';
}, $oTag->innertext);
}
return $oHtml . '';
}
示例9: get_data
/**
* curl 访问 开奖数据
*/
private function get_data()
{
include_once 'simplehtmldom_1_5/simple_html_dom.php';
$simple_html_dom = new \simple_html_dom();
//zlib 解压 并转码
$data = false;
$data = @file_get_contents("compress.zlib://" . self::URL);
if (!$data) {
$this->setLog(false, '重庆时时彩-开奖数据抓取失败');
exit('重庆时时彩-数据抓取失败,请尽快联系网站管理员' . "\r\n");
}
//转换成 UTF-8编码
$encode = mb_detect_encoding($data, array('ASCII', 'UTF-8', 'GB2312', "GBK", 'BIG5'));
$content = iconv($encode, 'UTF-8', $data);
$simple_html_dom->load($content);
//开奖期号
$qihao = $simple_html_dom->find('div[class=aside]', 0)->find('h3', 0)->find('em', 0)->plaintext;
//开奖号
$code = $simple_html_dom->find('div[class=aside]', 0)->find('div[class=mod-aside mod-aside-xssckj]', 0)->find('div[class=bd]', 0)->find('div[class=kpkjcode]', 0)->find('table', 0)->find('tr', 1)->find('td', 1)->plaintext;
if ($code == '--') {
exit('重庆时时彩-等待开奖...' . "\r\n");
}
$isKaiJiang = $simple_html_dom->find('div[class=aside]', 0)->find('div[class=mod-aside mod-aside-xssckj]', 0)->find('div[class=bd]', 0)->find('div[class=kpkjcode]', 0)->find('table', 0)->find('tr', 1)->find('td', 2)->plaintext;
if ($isKaiJiang == '--' && $isKaiJiang == '开奖中') {
exit('重庆时时彩-等待开奖...' . "\r\n");
}
$simple_html_dom->clear();
//将开奖号中间的空格去掉
$code = str_replace(" ", '', $code);
//开奖时间
$kjsj = date('Y-m-d H:i:s');
$this->data = ['qihao' => $qihao, 'kjsj' => $kjsj, 'code' => $code];
}
示例10: scraper
function scraper($url_search, $country_id)
{
$has_next = false;
$base_url = "http://ec.europa.eu/eures/eures-searchengine/servlet";
$html = scraperwiki::scrape($url_search);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find('table[class=JResult]') as $result) {
foreach ($result->find('td[class=JRTitle] a') as $job_page) {
$chars = explode("'", $job_page->onclick);
$url_job = $base_url . substr($chars[1], 1);
$url_id = strstr($url_job, 'uniqueJvId=');
$url_id = str_replace('uniqueJvId=', "", $url_id);
echo "JOB: " . $url_job . "<br />";
}
foreach ($result->find('th') as $data) {
$text = trim($data->plaintext);
if ($text == 'Description:') {
$description = trim($data->next_sibling()->plaintext);
echo "DESCRIPTION: " . $description . "<br />";
}
if ($text == 'Source:') {
$source = trim($data->next_sibling()->plaintext);
$source = str_replace("'", "\\'", $source);
if ($source != '' && $source != ' ') {
$source_id = insert_name('source', $source);
echo "SOURCE: " . $source . "<br /><br />";
}
}
}
$description = str_replace("'", "\\'", $description);
$description = str_replace("</BR>", "", $description);
$sql = mysql_query("SELECT * FROM job WHERE url = '{$url_job}'");
$cont = mysql_num_rows($sql);
if ($cont == 0) {
mysql_query("INSERT INTO job SET \n\t\t\t\t\turl = '{$url_job}', \n\t\t\t\t\turl_id = '{$url_id}', \n\t\t\t\t\tdescription = '{$description}', \n\t\t\t\t\tsource_id = '{$source_id}', \n\t\t\t\t\turl_search = '{$url_search}', \n\t\t\t\t\tcountry_id='{$country_id}',\n\t\t\t\t\turl_scraper_date = SYSDATE(),\t \n\t\t\t\t\turl_scraper_hour = SYSDATE()");
} else {
echo "Job URL already extracted: " . $url_job . "<br /><br />";
}
}
foreach ($dom->find('div[class=prevNext] a') as $next_page) {
$text = $next_page->plaintext;
if ($text == "Next page") {
$url_next = substr($next_page->href, 1);
$url_next = $base_url . $url_next;
$has_next = true;
print "<br /><br />NEXT: " . $url_next . "<br /><br />";
}
}
unset($html, $dom, $result, $job_page, $data, $next_page, $text, $url_id, $url_job, $description, $source, $source_id, $url_search);
//Comment this for tests, uncomment this to get all data
// if ($has_next == true){
// sleep(1);
// scraper($url_next, $country_id);
// }
}
示例11: createTables
/**
* 创建表结构
* @param $configs
*/
private static function createTables($configs)
{
$tables = self::$XML->find("table");
foreach ($tables as $value) {
$tableName = $configs["table-prefix"] . $value->name;
self::query("DROP TABLE IF EXISTS `{$tableName}`");
$sql = "CREATE TABLE `{$tableName}`(";
$pk = $value->find("pk", 0);
if ($pk) {
$sql .= "`{$pk->name}` {$pk->type} NOT NULL ";
if ($pk->ai) {
$sql .= "AUTO_INCREMENT ";
}
$sql .= "COMMENT '主键',";
}
//添加字段
$fields = $value->find("fields", 0);
if ($fields) {
foreach ($fields->children() as $fd) {
if ($fd->default || $fd->default === "0") {
//has default value
if (in_array($fd->default, self::$DEFAULT_VALUE_KEYWORD)) {
$sql .= "`{$fd->name}` {$fd->type} NOT NULL DEFAULT {$fd->default} COMMENT '{$fd->comment}',";
} else {
$sql .= "`{$fd->name}` {$fd->type} NOT NULL DEFAULT '{$fd->default}' COMMENT '{$fd->comment}',";
}
} else {
//has not default value
$sql .= "`{$fd->name}` {$fd->type} NOT NULL COMMENT '{$fd->comment}',";
}
//创建索引
if ($fd->getAttribute("add-index") == "true") {
$indexType = $fd->getAttribute("index-type");
if ($indexType == "normal") {
$sql .= "KEY `{$fd->name}` (`{$fd->name}`), ";
} elseif ($indexType == "unique") {
$sql .= "UNIQUE KEY `{$fd->name}` (`{$fd->name}`),";
}
}
}
}
if ($pk) {
$sql .= "PRIMARY KEY (`{$pk->name}`)";
}
$sql .= ") ENGINE={$value->engine} DEFAULT CHARSET={$configs['charset']} COMMENT='{$value->comment}' AUTO_INCREMENT=1 ;";
if (self::query($sql) !== false) {
tprintOk("create table '{$tableName}' successfully.");
} else {
tprintError("create table '{$tableName}' faild.");
tprintError(self::$DB_CONN->error);
}
}
}
示例12: find
public function find($url)
{
# sanitize url
$url = strip_tags($url);
$url = urldecode($url);
# Remove the end character
if ($url[strlen($url) - 1] == '/') {
$url = substr($url, 0, strlen($url) - 1);
}
# test if 'http://' is present
if (strpos($url, 'http://') !== 0) {
$url = 'http://' . $url;
}
# Tabs of rss feeds urls
$results = array();
# Try to load the content of url
$content = @file_get_contents($url);
if (!empty($content)) {
$html = new simple_html_dom();
$html->load($content, true);
# Check if it is an url to feeds
if (count($html->find('channel')) > 0) {
$results[] = array('href' => $url, 'title' => 'RSS');
} else {
if (count($html->find('feed')) > 0) {
$results[] = array('href' => $url, 'title' => 'Atom');
} else {
# Get links markup
$links = $html->find('link');
foreach ($links as $link) {
$href = $link->href;
$title = $link->title;
# Fix url to avoid path errors
if ($href[0] == '/') {
#
$href = $url . $href;
}
# Get the resource type
$type = $link->type;
# Save only feeds
if (!in_array($href, $results) && (ereg('application.*rss', $type) || ereg('application.*atom', $type))) {
$results[] = array('href' => $href, 'title' => $title);
}
}
}
}
}
# transform results tab into json tab
echo json_encode($results);
}
示例13: get_images
public function get_images($chapter_url, $prefix, $infix)
{
$ifx = Text::create($infix)->pad(3)->to_s();
$p = new Page($chapter_url);
$h = new simple_html_dom();
$h->load($p->content());
$select = $h->find('select[name="pagejump"]', 0);
$img = $h->find('#page', 0)->find('img', 0);
$srcdir = dirname($img->src);
$pages = array();
foreach ($select->find('option') as $opt) {
$pages["{$prefix}-{$infix}-{$opt->value}.jpg"] = $srcdir . '/' . $opt->value . '.jpg';
}
return $pages;
}
示例14: scrappe_offre
function scrappe_offre($html, $reference)
{
$dom = new simple_html_dom();
$dom->load($html);
$div = $dom->find("div.tx-sqliwebServiceanpe-pi5");
$span = $dom->find("div.tx-sqliwebServiceanpe-pi5 span.texteANPEDetail");
$actualisation = $span[1]->plaintext;
foreach ($span as $data) {
$tds = $data->find("td");
$record = array('actualiseJJ' => intval($actualisation[29] . $actualisation[30]), 'actualise le' => $actualisation, 'type_contrat' => $span[5]->plaintext, 'analyse_type_contrat' => "", 'experiance' => $span[6]->plaintext);
print json_encode($record) . "\n";
#scraperwiki::save(array('contenu_offre'), $record);
}
#return ;
}
示例15: scrapeHTML
function scrapeHTML($param, $type)
{
$html = scraperWiki::scrape(BASE_URL . "?type={$param}");
$dom = new simple_html_dom();
$dom->load($html);
// Iterate over table rows and get flight details.
foreach ($dom->find("TR[@HEIGHT='25']") as $data) {
// Flight details.
$tds = $data->find("td");
$airline = removeSpaces($tds[0]->plaintext);
$flight_type = $type;
$flight_num = removeSpaces($tds[1]->plaintext);
$destination = removeSpaces($tds[2]->plaintext);
$time = removeSpaces($tds[3]->plaintext);
$gate = removeSpaces($tds[4]->plaintext);
$remarks = removeSpaces($tds[5]->plaintext);
// Skip header row. Cheesy, but effective.
if ($airline == "Airline") {
continue;
}
// Set the date.
$date = date("m.d.y");
// Build up record to store.
$flight_data = array("date" => $date, "airline" => $airline, "flight_type" => $flight_type, "flight_num" => $flight_num, "destination" => $destination, "time" => $time, "gate" => $gate, "remarks" => $remarks);
// Save the record.
saveData(array("date", "airline", "flight_type", "flight_num"), $flight_data);
}
$dom->clear();
}