本文整理汇总了PHP中simple_html_dom::__destruct方法的典型用法代码示例。如果您正苦于以下问题:PHP simple_html_dom::__destruct方法的具体用法?PHP simple_html_dom::__destruct怎么用?PHP simple_html_dom::__destruct使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类simple_html_dom
的用法示例。
在下文中一共展示了simple_html_dom::__destruct方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: scrapeTEDRSS
function scrapeTEDRSS($url, $sector)
{
print $url . " " . $sector . "\n";
// $xml = scraperWiki::scrape($url);
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
// 10 second before aborting
// try CURLOPT_CONNECTTIMEOUT (in seconds)
// try CURLOPT_LOW_SPEED_LIMIT (to define what slow is, with):
// curl_setopt($curl, CURLOPT_LOW_SPEED_TIME, 10); (10 second at low speed before aborting
$xml = curl_exec($curl);
print curl_error($curl) . "\n";
$dom = new simple_html_dom();
$dom->load($xml);
$items = $dom->find("item");
foreach ($items as $item) {
$guid = $item->find("guid");
$noticeURL = str_replace("TEXT", "DATA", $guid[0]->plaintext);
print $noticeURL . " " . $sector . " " . memory_get_usage() / 1000000 . "MB";
echo "\n";
// $record = scrapeTEDDataPage ($noticeURL, $sector);
$record = array('time' => microtime(true), 'sector' => $sector, 'url' => $noticeURL);
scraperwiki::save(array('sector', 'url'), $record);
sleep(1);
}
$dom->__destruct();
unset($items);
unset($dom);
unset($xml);
print memory_get_usage() / 1024 / 1024 . "MB\n";
}
示例2: scrape_job_page
function scrape_job_page($page)
{
$page_html = scraperWiki::scrape("https://jobsearch.direct.gov.uk/JobSearch/PowerSearch.aspx?tm=0&pg=" . $page);
$dom = new simple_html_dom();
$dom->load($page_html);
foreach ($dom->find("table tr") as $data) {
$tds = $data->find("td");
if (count($tds) == 5) {
$id_hyperlink = $tds[0]->find('a[name]', 0);
$id = intval($id_hyperlink->name);
$more_info_hyperlink = $tds[2]->find('a', 0)->href;
print $more_info_hyperlink;
$record = array('id' => $id, 'posted_date' => date_create($tds[0]->plaintext), 'job_title' => trim($tds[2]->plaintext), 'company' => trim($tds[3]->plaintext), 'location' => trim($tds[4]->plaintext), 'url' => $more_info_hyperlink);
//print json_encode($record) . "\n";
scraperwiki::save(array('id'), $record);
}
}
$dom->__destruct();
}
示例3: foreach
scraperwiki::save_sqlite(array("stock"), $record, "NSE_Stocks");
}
}
}
$dom->__destruct();
}
//scrapping html
require 'scraperwiki/simple_html_dom.php';
foreach (range('A', 'Z') as $char) {
$dom = new simple_html_dom();
for ($pageNum = 0; $pageNum <= 10; $pageNum++) {
$html = scraperWiki::scrape("http://www.kotaksecurities.com/stock-market-news/equity/1024/pe-ratio-NSE-All-" . $char . "/" . $pageNum);
if ($html == NULL) {
continue;
}
$dom->load($html);
//print ("CHAR:".$char);
foreach ($dom->find('table[class="TableBG1"]') as $table) {
foreach ($table->find('tr[class="tabbody"]') as $tr) {
$stock = $tr->children(0)->plaintext;
$close = $tr->children(1)->plaintext;
$eps = $tr->children(2)->plaintext;
$pe = $tr->children(3)->plaintext;
$record = array('stock' => $stock, 'close' => $close, 'eps' => $eps, 'pe' => $pe);
//print_r($record);
scraperwiki::save_sqlite(array("stock"), $record, "NSE_Stocks");
}
}
}
$dom->__destruct();
}
示例4: extractCFP
function extractCFP($cat, $name, $link)
{
print " " . $name . " -- " . $link . "\n";
if (alreadyKnown($cat, $name, $link)) {
return false;
}
$html = scraperWiki::scrape("http://www.wikicfp.com/" . str_replace(" ", "%20", $link));
$dom = new simple_html_dom();
$dom->load($html);
$spans = $dom->find("span");
$type = "";
$title = "";
$link = "";
$id = "";
$description = "";
$locality = "";
$summaries = array();
$startdates = array();
$enddates = array();
$sdate = "";
$edate = "";
$deadline = "";
$notification = "";
$finalversion = "";
foreach ($spans as $span) {
// print_r($span);
if (isset($span->attr['property'])) {
// print(" ".$span->attr['property']."=".$span->attr['content']."\n");
if (strcmp($span->attr['property'], "v:eventType") === 0) {
$type = $span->attr['content'];
print " type = " . $type . "\n";
}
if (strcmp($span->attr['property'], "dc:title") === 0) {
$title = $span->attr['content'];
print " title = " . $title . "\n";
}
if (strcmp($span->attr['property'], "dc:source") === 0) {
$link = $span->attr['content'];
print " link = " . $link . "\n";
}
if (strcmp($span->attr['property'], "dc:identifier") === 0) {
$id = $span->attr['content'];
print " id = " . $id . "\n";
}
if (strcmp($span->attr['property'], "dc:description") === 0) {
$description = $span->attr['content'];
print " description = " . $description . "\n";
}
if (strcmp($span->attr['property'], "v:locality") === 0) {
$locality = $span->attr['content'];
print " locality = " . $locality . "\n";
}
if (strcmp($span->attr['property'], "v:summary") === 0) {
$summaries[] = $span->attr['content'];
}
if (strcmp($span->attr['property'], "v:startDate") === 0) {
$startdates[] = $span->attr['content'];
}
if (strcmp($span->attr['property'], "v:endDate") === 0) {
$enddates[] = $span->attr['content'];
}
}
$dom->__destruct();
}
foreach ($summaries as $ind => $summary) {
if (strcmp($summary, $name) === 0) {
$sdate = $startdates[$ind];
$edate = $enddates[$ind];
print " between " . $sdate . " and " . $edate . "\n";
}
if (strcmp($summary, "Submission Deadline") === 0) {
$deadline = $startdates[$ind];
print " deadline = " . $deadline . "\n";
}
if (strcmp($summary, "Notification Due") === 0) {
$notification = $startdates[$ind];
print " notification = " . $notification . "\n";
}
if (strcmp($summary, "Final Version Due") === 0) {
$finalversion = $startdates[$ind];
print " finalversion = " . $finalversion . "\n";
}
}
$record = array('id' => $id, 'category' => $cat, 'type' => $type, 'title' => $title, 'link' => $link, 'location' => $locality, 'description' => $description, 'startdate' => $sdate, 'enddate' => $edate, 'deadline' => $deadline, 'notification' => $notification, 'finalversion' => $finalversion);
scraperwiki::save(array('ID', 'category'), $record);
sleep(5);
return true;
}
示例5: get_city_list
function get_city_list($url)
{
global $run_environment;
global $max_records;
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
// table/tr/td/div/table/tr/td[2]/table/tr/td/table/tr[5]
$content = $dom->find("div[id=ctl00_cphmain_pnlIndex]", 0)->find("table", 1);
$count = 0;
foreach ($content->find("a") as $link) {
if ($link->href) {
$city['source'] = 'http://www.floridaleagueofcities.com' . $link->href;
$city['name'] = $link->plaintext;
$cities[] = $city;
$count++;
}
}
// Clear memory
$dom->__destruct();
$content->__destruct();
return $cities;
}
示例6: get_city_data
function get_city_data($url)
{
global $run_environment;
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$count = 1;
// for debugging
if (!$dom->find("table", 0)) {
echo $url;
exit;
}
// /html/body/table/tbody/tr/td/div/section/div/table
// $content = $dom->find("table", 0)->find("tr", 0)->find("td", 0)->find("div", 0)->find("table", 0)->find("tr", 0)->find("td", 2)->find("table", 0);
$content = $dom->find("table", 0)->find("tr", 0)->find("td", 0)->find("div", 0)->find("section", 0)->find("div", 0)->find("table", 0);
$city['source'] = $url;
$city['name_full'] = $content->find("h2", 0)->plaintext;
$city['name'] = substr($city['name_full'], strpos($city['name_full'], ' of ') + 4);
$city['type'] = strtolower(substr($city['name_full'], 0, strpos($city['name_full'], ' of ')));
$city['url'] = $content->find("tr", 5)->find("td", 1)->find("a", 0) ? $content->find("tr", 5)->find("td", 1)->find("a", 0)->href : null;
$city['region'] = trim($content->find("tr", 6)->find("td", 1)->plaintext);
$city['county'] = trim($content->find("tr", 7)->find("td", 1)->plaintext);
$city['address1'] = trim($content->find("tr", 8)->find("td", 1)->plaintext);
$city['address2'] = trim($content->find("tr", 9)->find("td", 1)->plaintext);
$city['phone'] = trim($content->find("tr", 10)->find("td", 1)->plaintext);
$city['fax'] = trim($content->find("tr", 11)->find("td", 1)->plaintext);
$city['council_meeting_time'] = trim($content->find("tr", 12)->find("td", 1)->plaintext);
$city['year_incorporated'] = trim($content->find("tr", 13)->find("td", 1)->plaintext);
$city['fiscal_year_start'] = trim($content->find("tr", 14)->find("td", 1)->plaintext);
$city['population'] = trim($content->find("tr", 15)->find("td", 1)->plaintext);
$city['government_type'] = trim($content->find("tr", 16)->find("td", 1)->plaintext);
$city['civil_service'] = trim($content->find("tr", 17)->find("td", 1)->plaintext);
$rep_details = get_rep_details($content, $url, $city['name']);
// Clear memory
$dom->__destruct();
$content->__destruct();
if ($run_environment == 'dev') {
$city['reps'] = $rep_details;
return $city;
} else {
scraperwiki::save_sqlite(array('name_full', 'source'), $city, $table_name = 'jurisdiction');
return true;
}
}
示例7: foreach
$url[] = "cpu";
$url[] = "motherboard";
$url[] = "memory";
$url[] = "internal-hard-drive";
$url[] = "video-card";
$url[] = "power-supply";
$url[] = "case";
$url[] = "monitor";
$html = scraperWiki::scrape("http://pcpartpicker.com/parts/monitor/");
$dom = new simple_html_dom();
$dom->load($html);
unset($html);
foreach ($dom->find("id=\"list_table\" tr") as $data) {
$tds = $data->find("td");
$tdsa = $data->find("td a");
if (!empty($tds[0])) {
$html_a = scraperWiki::scrape("http://pcpartpicker.com" . $tdsa[0]->href);
$dom_a = new simple_html_dom();
$dom_a->load($html_a);
$table_a = $dom_a->find("table class=\"box-table-a\"");
$rekod_a["href"] = $tdsa[0]->href;
foreach ($table_a[0]->find("tr") as $data_a) {
$tds_a = $data_a->find("td");
$rekod_a[$tds_a[0]->plaintext] = $tds_a[1]->plaintext;
}
scraperwiki::save(array('href'), $rekod_a);
//print json_encode($rekod_a) . "\n";
$dom_a->__destruct();
}
}
$dom->__destruct();
示例8: get_cb_data
//.........这里部分代码省略.........
$heading = strtolower(str_replace(' ', '_', $heading));
// Clean up stray html tags
if (stripos($val[1], '<span>')) {
$val[1] = get_between($val[1], '<span>', '</span>');
}
$val[1] = trim($val[1], '</p>');
$val[1] = trim($val[1], ',');
$val[1] = trim($val[1], ',');
$cb[$heading] = $val[1];
}
}
// check if we have data in the email field that needs to be parsed like the website url
if (!empty($cb['address'])) {
$cb['address'] = trim($cb['address']);
$cb['address'] = trim($cb['address'], ',');
$cb['address'] = str_replace(",,", ",", $cb['address']);
$cb['address'] = trim($cb['address']);
$lines = explode(',', $cb['address']);
$line_num = count($lines) - 1;
if ($line_num >= 4) {
$cb['address_title'] = $lines[$line_num - 4];
} else {
$cb['address_title'] = $cb['borough'] . ' ' . $cb['community_board'];
}
if ($cb['address_title'] == $lines[$line_num - 3]) {
$cb['address_1'] = $lines[$line_num - 2];
$cb['address_2'] = null;
} else {
$cb['address_1'] = $lines[$line_num - 3];
$cb['address_2'] = $lines[$line_num - 2];
}
$zip = trim($lines[$line_num], ', NY ');
$cb['address_zip'] = $zip;
$cb['address_city'] = $lines[$line_num - 1];
$cb['address_state'] = 'NY';
}
// check if we have data in the email field that needs to be parsed like the website url
if (!empty($cb['email'])) {
$snippet = new simple_html_dom();
$snippet->load($cb['email']);
if ($snippet->find('a', 0)) {
// Isolate the email address from the other html
if (stripos($cb['email'], '<a') > 0) {
$cb['email'] = trim(substr($cb['email'], 0, stripos($cb['email'], '<a')));
if (count($emails = explode(',', $cb['email'])) > 1) {
$cb['all_email'] = $cb['email'];
$cb['email'] = trim($emails[0]);
$cb['email'] = trim($cb['email'], ' ');
}
} else {
$cb['email'] = null;
$cb['website'] = null;
}
$cb['website'] = $snippet->find('a', 0)->href;
// External URLs have a proxy URL on nyc.gov, let's parse that off
if (stripos($cb['website'], 'exit.pl')) {
$cb['website'] = substr($cb['website'], stripos($cb['website'], 'exit.pl?') + 12);
}
} else {
$cb['website'] = null;
}
} else {
$cb['email'] = null;
}
// Make this field universal, even if we don't have any data
if (empty($cb['all_email'])) {
$cb['all_email'] = null;
}
// verify we didn't mix up website and email
if (!empty($cb['website']) && stripos($cb['website'], 'mailto') !== FALSE) {
$cb['email'] = substr($cb['website'], stripos($cb['website'], 'mailto:') + 7);
$cb['website'] = null;
}
// Be sure to clear any stray commas
if (!empty($cb['email'])) {
$cb['email'] = trim($cb['email'], ',');
}
// normalize field names
if (!empty($cb['chairperson'])) {
$cb['chair'] = $cb['chairperson'];
unset($cb['chairperson']);
}
if ($run_environment == 'dev') {
$cbs[] = $cb;
} else {
scraperwiki::save_sqlite(array('source', 'borough', 'community_board_number'), $cb, $table_name = 'community_board');
}
$count++;
//if ($run_environment == 'dev' && $count > $max_records) break;
// Clear memory
$board->__destruct();
}
// Clear memory
$dom->__destruct();
if ($run_environment == 'dev') {
return $cbs;
} else {
return true;
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:101,代码来源:city_representatives_-_nyc_community_boards_2.php
示例9: substr
$email_rest = substr($datos_rest[1]->innertext, $posicion_corte);
} else {
$email_rest = "";
}
$iframe = $dom_rest->find("div.TabbedPanelsContent iframe");
$posicion_corte = strpos($iframe[0]->src, "ll=") + 3;
$coords_rest = substr($iframe[0]->src, $posicion_corte);
$posicion_corte = strpos($coords_rest, "&");
$coords_rest = substr($coords_rest, 0, $posicion_corte);
$coords = explode(",", $coords_rest);
$lat_rest = $coords[0];
$lon_rest = $coords[1];
$web_rest = $dom_rest->find("span.url a.external");
if ($web_rest) {
$web_rest = $web_rest[0]->href;
} else {
$web_rest = "";
}
$type_rest = $dom_rest->find("div.col_02 p");
$type_rest = $type_rest[0]->innertext;
$desc_rest = $dom_rest->find("div.col_02");
$posicion_corte = strpos($desc_rest[1]->innertext, "Descripci") + 54;
$desc_rest = strip_tags(substr($desc_rest[1]->innertext, $posicion_corte));
$desc_rest = $type_rest . " - " . $desc_rest;
$restaurante = array("nombre" => utf8_encode($nombre_rest), "direccion" => utf8_encode($direccion_rest), "telefono" => $telefono_rest, "descripcion" => utf8_encode($desc_rest), "lattitude" => $lat_rest, "longitude" => $lon_rest);
$restaurantes[] = $restaurante;
$dom_rest->__destruct();
}
$dom->__destruct();
}
scraperwiki::save(array('nombre'), $restaurantes);
示例10: convert
/**
* Convert Embedded CSS to Inline
* @param string $document
* @param bool $strip_class strip attribute class
*/
function convert($document, $strip_class = false)
{
// Debug mode
// Debug mode will output selectors and styles that are detected in the embedded CSS
$debug = false;
// Extract the CSS
preg_match('/<style[^>]+>(?<css>[^<]+)<\\/style>/s', $document, $matches);
// If no CSS style
if (empty($matches)) {
return $document;
}
// Strip out extra newlines and tabs from CSS
$css = preg_replace("/[\n\r\t]+/s", "", $matches['css']);
// Extract each CSS declaration
preg_match_all('/([-a-zA-Z0-9_ ,#\\.]+){([^}]+)}/s', $css, $rules, PREG_SET_ORDER);
// For each CSS declaration, make the selector and style declaration into an array
// Array index 1 is the CSS selector
// Array index 2 is the CSS rule(s)
foreach ($rules as $rule) {
// If the CSS selector is multiple, we should split them up
if (strstr($rule['1'], ',')) {
// Strip out spaces after a comma for consistency
$rule['1'] = str_replace(', ', ',', $rule['1']);
// Unset any previous combos
unset($selectors);
// Make each selector declaration its own
// Create a separate array element in styles array for each declaration
$selectors = explode(',', $rule['1']);
foreach ($selectors as $selector) {
$selector = trim($selector);
if (!isset($styles[$selector])) {
$styles[$selector] = '';
}
$styles[$selector] .= trim($rule['2']);
if ($debug) {
echo $selector . ' { ' . trim($rule['2']) . ' }<br/>';
}
}
} else {
$selector = trim($rule['1']);
if (!isset($styles[$selector])) {
$styles[$selector] = '';
}
$styles[$selector] .= trim($rule['2']);
if ($debug) {
echo $selector . ' { ' . trim($rule['2']) . ' }<br/>';
}
}
}
// DEBUG: Show selector and declaration
if ($debug) {
echo '<pre>';
foreach ($styles as $selector => $styling) {
echo $selector . ':<br>';
echo $styling . '<br/><br/>';
}
echo '</pre><hr/>';
}
// For each style declaration, find the selector in the HTML and add the inline CSS
if (!empty($styles)) {
// Load Simple HTML DOM helper
require_once 'simple_html_dom.php';
$html_dom = new simple_html_dom();
// Load in the HTML without the head and style definitions
$html_dom->load(preg_replace('/\\<head\\>(.+?)\\<\\/head>/s', '', $document));
foreach ($styles as $selector => $styling) {
foreach ($html_dom->find($selector) as $element) {
// Check to make sure the style doesn't already exist
if (!stristr($element->style, $styling)) {
if (strlen($element->style) > 0 && substr(rtrim($element->style), -1) !== ';') {
$element->style .= ';';
}
// If there is any existing style, this will append to it
$element->style .= $styling;
}
}
}
$inline_css_message = $html_dom->save();
// Strip class attribute
if ($strip_class === true) {
$inline_css_message = preg_replace('~(<[a-z0-0][^>]*)(\\s(?:class|id)\\s*=\\s*(([\'"]).*?\\4|[^\\s]*))~usi', '\\1', $inline_css_message);
}
$html_dom->__destruct();
return $inline_css_message;
}
return false;
}
示例11: _scrapeIndexPage
function _scrapeIndexPage($url)
{
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$result = array();
foreach ($dom->find("div[@id='paddingLR12'] p") as $data) {
$as = $data->find("a");
$record = array('title' => $as[0]->plaintext, 'url' => URL_BASE . substr($as[0]->href, 1));
$result[] = $record;
}
$dom->__destruct();
return $result;
}
示例12: scrapeTEDDataPage
function scrapeTEDDataPage($url, $sector)
{
$record = array();
// print ("a \n");
if (strcmp($url, "http://ted.europa.eu/") === 0) {
return $record;
}
//print ("b \n");
$time = microtime(true);
// $html = scraperWiki::scrape($url);
$curl = curl_init($url);
//print ("d \n");
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
//print ("e \n");
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
//print ("f \n");
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
//print ("g \n");
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_TIMEOUT, 10);
// 10 second before aborting
// try CURLOPT_CONNECTTIMEOUT (in seconds)
// try CURLOPT_LOW_SPEED_LIMIT (to define what slow is, with):
// curl_setopt($curl, CURLOPT_LOW_SPEED_TIME, 10); (10 second at low speed before aborting
//print ("h \n");
$html = curl_exec($curl);
//print ("i ".curl_error($curl)."\n");
curl_close($curl);
//print ("j \n");
$dom = new simple_html_dom();
//print ("k \n");
$dom->load($html);
print "......done in " . (microtime(true) - $time) . "s " . memory_get_usage() / 1000000 . "MB\n";
$tabletrs = $dom->find("table[class=data] tr");
$record = array('ID' => $url, 'sector' => $sector);
foreach ($tabletrs as $tabletr) {
$th = $tabletr->find("th");
$tds = $tabletr->find("td");
$record[$th[0]->plaintext] = $tds[1]->plaintext;
unset($th);
unset($tds);
}
unset($tabletrs);
$dom->__destruct();
unset($dom);
unset($html);
scraperwiki::save(array('ID', 'sector'), $record);
sleep(2);
return $record;
}
示例13: trim
$doc_name = trim($tr->children(1)->plaintext);
$father_name = trim($tr->children(2)->plaintext);
$quals_name = trim($tr->children(3)->plaintext);
$univ_name = trim($tr->children(4)->plaintext);
$link = $tr->find('a', 0);
if ($link != null) {
$link_text = $link->href;
$inner_html = scraperWiki::scrape("http://www.apmedicalcouncil.com/" . $link_text);
$inner_dom->load($inner_html);
$birth_date = trim($inner_dom->find('div[id="birth"]', 0)->plaintext);
$sex = trim($inner_dom->find('div[id="sex"]', 0)->plaintext);
$quals = trim($inner_dom->find('div[id="qualification"]', 0)->plaintext);
$quals2 = trim($inner_dom->find('div[id="addqualification"]', 0)->plaintext);
$add = trim($inner_dom->find('div[id="address"]', 0)->plaintext);
$add_more = trim($inner_dom->find('div[id="address2"]', 0)->plaintext);
$record = array('regd_num' => $regd_num, 'doc_name' => $doc_name, 'father_name' => $father_name, 'quals_name' => $quals_name, 'univ_name' => $univ_name, 'birth_date' => $birth_date, 'sex' => $sex, 'qualification' => $quals, 'advanced_quals' => $quals2, 'address' => $add, 'add_more' => $add_more);
} else {
$record = array('regd_num' => $regd_num, 'doc_name' => $doc_name, 'father_name' => $father_name, 'quals_name' => $quals_name, 'univ_name' => $univ_name);
}
print_r($record);
$counter++;
//print_r($link);
//scraperwiki::save_sqlite(array("doc_name"),$record,"AP_Docs");
if ($counter == 2) {
break;
}
}
}
}
$inner_dom->__destruct();
$dom->__destruct();
示例14: get_council_list
function get_council_list($url)
{
global $run_environment;
global $max_records;
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
// table/tr/td/div/table/tr/td[2]/table/tr/td/table/tr[5]
$content = $dom->find("table[id=members_table]", 0);
$count = 1;
foreach ($content->find("tr") as $row) {
if ($count > 1) {
$councilmember['name'] = $row->find("td", 0)->plaintext;
$councilmember['source'] = 'http://council.nyc.gov' . $row->find("td", 0)->find("a", 0)->href;
$councilmember['district'] = $row->find("td", 1)->plaintext;
$councilmember['borough'] = $row->find("td", 2)->plaintext;
$councilmember['party'] = $row->find("td", 3)->plaintext;
$council[] = $councilmember;
}
$count++;
}
// Clear memory
$dom->__destruct();
$content->__destruct();
return $council;
}
示例15: get_link_list
function get_link_list($url)
{
global $run_environment;
global $max_records;
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$content = $dom->find("table[class=wikitable]", 0);
$count = 1;
$endpoints = array();
foreach ($content->find("tr") as $row) {
//Skip first line
if ($count == 1) {
$count++;
continue;
}
$endpoint['jurisdiction'] = $row->find("td", 0) ? trim($row->find("td", 0)->plaintext) : null;
$endpoint['services'] = $row->find("td", 6)->find("a", 0) ? $row->find("td", 6)->find("a", 0)->href : null;
$endpoint['base_url'] = $endpoint['services'] ? substr($endpoint['services'], 0, strpos($endpoint['services'], 'services.xml')) : null;
// Make sure we get the correct URL for discovery
if ($row->find("td", 4)->find("a", 0)) {
foreach ($row->find("td", 4)->find("a") as $link) {
if (strtolower($link->plaintext) == 'xml') {
$endpoint['discovery'] = $link->href;
}
}
}
if (empty($endpoint['discovery'])) {
$endpoint['discovery'] = null;
}
$endpoints[] = $endpoint;
unset($endpoint);
$count++;
}
// Clear memory
$dom->__destruct();
$content->__destruct();
return $endpoints;
}