本文整理汇总了PHP中scraperWiki类的典型用法代码示例。如果您正苦于以下问题:PHP scraperWiki类的具体用法?PHP scraperWiki怎么用?PHP scraperWiki使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了scraperWiki类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: scrape_page
function scrape_page()
{
$row = 0;
$html = scraperWiki::scrape("http://asuntojen.hintatiedot.fi/haku/?c=" . $GLOBALS['c'] . "&s=" . $GLOBALS['s'] . "&r=" . $GLOBALS['r'] . "&amin=" . $GLOBALS['amin'] . "&amax=" . $GLOBALS['amax'] . "&z=" . $GLOBALS['z']);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("tr") as $data) {
$tds = $data->find("td");
if (count($tds) > 8) {
$row++;
$GLOBALS['rowTotal']++;
$apt = array("Uniikkiavain" => $GLOBALS['rowTotal'], "Kaupunginosa" => $tds[0]->plaintext, "Myyntihinta" => $tds[3]->plaintext, "Neliohinta" => $tds[4]->plaintext, "Tyyppi" => $tds[1]->plaintext, "Koko" => $tds[2]->plaintext);
scraperwiki::save_sqlite(null, $apt, $table_name = $GLOBALS['c'] . " " . $GLOBALS['time']);
print $GLOBALS['rowTotal'] . "\n";
print $row . ". Sijainti: " . $tds[0]->plaintext . " Hinta: " . $tds[3]->plaintext . " Tyyppi: " . $tds[1]->plaintext . " Koko: " . $tds[2]->plaintext . " Neliöhinta: " . $tds[4]->plaintext . "€" . "\n";
}
}
if ($row == 50) {
print "Vielä jatkuu, haetaan seuraava sivu..." . "\n";
$GLOBALS['z']++;
scrape_page();
} else {
print "Skrääpiminen suoritettu." . "\n";
print "Sivuja yhteensä: " . $GLOBALS['z'] . "\n";
print "Rivejä yhteensä: " . $GLOBALS['rowTotal'] . "\n";
}
}
示例2: scrap_yp
function scrap_yp($last_alphabet = '', $last_page = '')
{
$alphabet = range('a', 'z');
if (is_null($last_alphabet) || $last_alphabet == '') {
$temp_alphabet = scraperwiki::get_var('last_alphabet_loaded');
if (!is_null($temp_alphabet)) {
$last_alphabet = $temp_alphabet;
} else {
$last_alphabet = 'a';
}
}
if (is_null($last_page) || $last_page == '') {
$temp_page = scraperwiki::get_var('last_page_loaded');
if (!is_null($temp_page)) {
$last_page = $temp_page;
} else {
$last_page = 1;
}
}
$yp_base_url = 'http://www.yellowpages.co.id/browse/letter/' . $last_alphabet . '?page=' . $last_page;
$html = scraperWiki::scrape($yp_base_url);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("ul.directory-list") as $data) {
echo $data;
}
}
示例3: run_ml
function run_ml($q_num = 0)
{
$html = scraperWiki::scrape("http://musiklegal.com/search/result/a/" . $q_num);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("tr") as $data) {
$tds = $data->find("td");
$temp_data = explode('">', str_replace('</<strong>a</strong>>', '', str_replace('<<strong>a</strong> href="http://musiklegal.com/song/detail/', '', $tds[1]->plaintext)));
$record = array('No' => str_replace('.', '', $tds[0]->plaintext), 'Code' => $temp_data[0], 'Song Title' => $temp_data[1], 'Artist' => $tds[2]->plaintext, 'Album' => $tds[3]->plaintext);
/*
* Stores results
*/
scraperwiki::save_sqlite(array("No"), $record);
unset($temp_data);
}
foreach ($dom->find("a") as $a) {
if ($a->plaintext == 'Next') {
$tmp_a = $a->href;
$tmp_a = str_replace('http://musiklegal.com/search/result/a/', '', $tmp_a);
if ($tmp_a > 0) {
continue;
}
}
}
if ((int) $tmp_a != 0) {
run_ml($tmp_a);
} else {
exit;
}
}
示例4: get_dom
function get_dom($url)
{
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
return $dom;
}
示例5: scrapeHTML
function scrapeHTML($param, $type)
{
$html = scraperWiki::scrape("http://www.norwegian.no/fly/lavpris/?D_City=CPH&A_City=DUB&TripType=2&D_Day=1&D_Month=201104&R_Day=1&R_Month=201104&AdultCount=1&ChildCount=0&InfantCount=0");
$dom = new simple_html_dom();
$dom->load($html);
// Iterate over table rows and get flight details.
foreach ($dom->find("TR[@HEIGHT='25']") as $data) {
// Flight details.
$tds = $data->find("div");
$airline = removeSpaces($tds[0]->plaintext);
$flight_type = $type;
$flight_num = removeSpaces($tds[1]->plaintext);
$destination = removeSpaces($tds[2]->plaintext);
$time = removeSpaces($tds[3]->plaintext);
$gate = removeSpaces($tds[4]->plaintext);
$remarks = removeSpaces($tds[5]->plaintext);
// Skip header row. Cheesy, but effective.
if ($airline == "Airline") {
continue;
}
// Set the date.
$date = date("m.d.y");
// Build up record to store.
$flight_data = array("date" => $date, "airline" => $airline, "flight_type" => $flight_type, "flight_num" => $flight_num, "destination" => $destination, "time" => $time, "gate" => $gate, "remarks" => $remarks);
// Save the record.
saveData(array("date", "airline", "flight_type", "flight_num"), $flight_data);
}
$dom->clear();
}
示例6: scrapeMarketGroup
function scrapeMarketGroup($url)
{
global $visitedIds;
$html = scraperWiki::scrape($url);
$html = str_replace("\n", "", $html);
preg_match_all("|<a href=\"/importing/61000746/marketgroup/(\\d+?)/\">(.+?)</a>|s", $html, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
$groupId = $match[1];
$groupName = html_entity_decode($match[2]);
//echo $groupName."\n";
if (!in_array($groupId, $visitedIds)) {
$visitedIds[] = $groupId;
scrapeMarketGroup("http://goonmetrics.com/importing/61000746/marketgroup/" . $groupId . "/");
}
}
preg_match_all("|<tr(.*?)>(.*?)<td(.*?)><a href=\"http://games.chruker.dk/eve_online/item.php\\?type_id=(.+?)\" target=\"_blank\">(.*?)<span class=\"dot\" onclick=\"CCPEVE.showMarketDetails\\((.*?)\\)\">(.+?)</span>(.*?)</td>(.*?)<td(.*?)>(.+?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.+?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)</tr>|s", $html, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
$item = array("itemId" => trim($match[4]), "name" => trim(mb_check_encoding($match[7], 'UTF-8') ? $match[7] : utf8_encode($match[7])), "weekVol" => trim(mb_check_encoding($match[11], 'UTF-8') ? $match[11] : utf8_encode($match[11])), "k6Stock" => trim(mb_check_encoding($match[17], 'UTF-8') ? $match[17] : utf8_encode($match[17])));
$item['weekVol'] = str_replace(",", "", $item['weekVol']);
$item['k6Stock'] = str_replace(",", "", $item['k6Stock']);
$saved = false;
$delay = 0;
while (!$saved && $delay < 600) {
try {
@scraperwiki::save_sqlite(array('itemId'), $item, 'eve_goonmetrics');
$saved = true;
} catch (Exception $e) {
sleep(10);
$delay++;
}
}
}
}
示例7: grep_munich
function grep_munich($url, $table_name)
{
$html = scraperWiki::scrape($url);
$count = 0;
# Use the PHP Simple HTML DOM Parser to extract <td> tags
$dom = new simple_html_dom();
$dom->load($html);
//Drop all old informations by dropping the table
scraperwiki::sqliteexecute("drop table if exists " . $table_name);
scraperwiki::sqlitecommit();
$table = $dom->getElementById('flight_info_area');
foreach ($table->find('tr') as $data) {
// Flight details. Read tds or ths
$tds = $data->find("td");
//if there are less then 7 columns continue to next loop
if (sizeof($tds) < 7) {
continue;
}
//print $data->plaintext . "\n";
$flightnr = $tds[1]->plaintext;
$from = $tds[2]->plaintext;
$time = $tds[3]->plaintext;
$expected_time = $tds[4]->plaintext;
//Create date
$date = date("Y-m-d");
//Build array of flight informations
$flight_data = array("date" => $date, "count" => $count, "flightnr" => $flightnr, "from" => $from, "time" => $time, "expected_time" => $expected_time);
//Save the informations of one flight
scraperwiki::save_sqlite(array("date", "count"), $flight_data, $table_name);
$count = $count + 1;
}
}
示例8: getCardInfo
function getCardInfo($url)
{
$baseURL = 'http://gatherer.wizards.com/Pages/Card/';
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$cardImage = $dom->find('img[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_cardImage]', 0)->src;
$cardImage = str_replace("amp;", "", $cardImage);
$imgURL = $baseURL . $cardImage;
$name = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_nameRow] div[class=value]', 0)->plaintext;
$name = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $name);
$mana = "";
$manaImages = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_manaRow] div[class=value] img');
foreach ($manaImages as $manaItem) {
$mana .= substr($manaItem->alt, 0, 1);
}
$mana = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $mana);
$cmc = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_cmcRow] div[class=value]', 0);
$cmc = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $cmc);
$type = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_typeRow] div[class=value]', 0);
$type = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $type);
$text = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_textRow] div[class=value]', 0);
$text = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $text);
$flavor = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_flavorRow] div[class=value]', 0);
$flavor = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $flavor);
$cardNumber = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_numberRow] div[class=value]', 0);
$cardNumber = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $cardNumber);
$artist = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_artistRow] div[class=value]', 0);
$artist = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $artist);
$rarity = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_rarityRow] div[class=value]', 0);
$rarity = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $rarity);
$set = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_setRow] div[class=value]', 0);
$set = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $set);
scraperwiki::save_sqlite(array("card"), array("Card" => trim($name), "Image" => $imgURL, "Mana" => trim($mana), "CMC" => trim($cmc), "Type" => trim($type), "Card Text" => trim($text), "Flavor Text" => trim($flavor), "Artist" => trim($artist), "Card Number" => trim($cardNumber), "Rarity" => trim($rarity), "Expansion" => trim($set)));
}
示例9: read_listing
function read_listing($params, $url = 'http://www.auto24.ee/kasutatud/nimekiri.php')
{
$endpoint = build_query($url, $params);
$html = scraperWiki::scrape($endpoint);
$dom = new simple_html_dom();
$dom->load($html);
$totalResultsEl = $dom->find('.paginator .current-range strong');
$totalResults = $totalResultsEl[0]->plaintext;
$medianItem = ($totalResults + 1) / 2;
if ($medianItem > RESULTS_PER_PAGE) {
$listingOffset = floor($medianItem / RESULTS_PER_PAGE) * RESULTS_PER_PAGE;
$params['ak'] = $listingOffset;
$medianItem -= $listingOffset;
$endpoint = build_query($url, $params);
$html = scraperWiki::scrape($endpoint);
$dom = new simple_html_dom();
$dom->load($html);
}
$rows = $dom->find("[@id=usedVehiclesSearchResult] .result-row");
$lPoint = floor($medianItem) - 1;
$hPoint = ceil($medianItem) - 1;
$a24ksi = 0;
if ($lPoint == $hPoint) {
$rowData = get_row_data($rows[$lPoint]);
$a24ksi = $rowData['price'];
} else {
$lRowData = get_row_data($rows[$lPoint]);
$hRowData = get_row_data($rows[$hPoint]);
$a24ksi = round(($lRowData['price'] + $hRowData['price']) / 2);
}
return array('n' => $totalResults, 'val' => $a24ksi);
}
示例10: scrapeHTML
function scrapeHTML($param, $type)
{
$html = scraperWiki::scrape(BASE_URL . "?type={$param}");
$dom = new simple_html_dom();
$dom->load($html);
// Iterate over table rows and get flight details.
foreach ($dom->find("TR[@HEIGHT='25']") as $data) {
// Flight details.
$tds = $data->find("td");
$airline = removeSpaces($tds[0]->plaintext);
$flight_type = $type;
$flight_num = removeSpaces($tds[1]->plaintext);
$destination = removeSpaces($tds[2]->plaintext);
$time = removeSpaces($tds[3]->plaintext);
$gate = removeSpaces($tds[4]->plaintext);
$remarks = removeSpaces($tds[5]->plaintext);
// Skip header row. Cheesy, but effective.
if ($airline == "Airline") {
continue;
}
// Set the date.
$date = date("m.d.y");
// Build up record to store.
$flight_data = array("date" => $date, "airline" => $airline, "flight_type" => $flight_type, "flight_num" => $flight_num, "destination" => $destination, "time" => $time, "gate" => $gate, "remarks" => $remarks);
// Save the record.
saveData(array("date", "airline", "flight_type", "flight_num"), $flight_data);
}
$dom->clear();
}
示例11: scrape_NG_news_article
function scrape_NG_news_article($art_url)
{
$html = scraperWiki::scrape($art_url);
require_once 'scraperwiki/simple_html_dom.php';
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("div#page_head h1") as $data) {
$art_title = $data->innertext;
}
foreach ($dom->find("div#page_head h2") as $data) {
$art_subtitle = $data->innertext;
}
$art_text_array = array();
$art_paragraph_count = 0;
$art_text_full = "";
$art_teaser50 = "";
$art_teaser100 = "";
foreach ($dom->find("div#content div.article_text p") as $data) {
$art_paragraph_count++;
$tmp = str_get_html($data)->plaintext;
// $art_text_array[$art_paragraph_count] = $tmp;
$art_text_full .= $tmp . " #" . $art_paragraph_count . "# ";
//if ($art_paragraph_count == 1) $art_teaser = $tmp;
}
$art_teaserS = word_teaser($art_text_full, 60);
$art_teaserM = word_teaser($art_text_full, 120);
/* print $art_text_full; show_article($art_title,$art_subtitle,$art_text_array);
for($i=0;$i<count($art_text_array);$i++) { $art_text_full .= $art_text_array[$i]." #".$i."# "; }
$art_text_full = $art_text_full->plaintext; $art_teaser = $art_text_array[0]->plaintext; */
// $record = array("Title" => $art_title, "Subtitle" => $art_subtitle, "TeaserS" => $art_teaserS, "TeaserM" => $art_teaserM, "Text" => $art_text_full, "URL" => $art_url);
$record = array("TeaserM" => $art_teaserM, "URL" => $art_url);
scraperwiki::save(array('URL'), $record);
return $record;
}
示例12: populateDOM
function populateDOM($htmlDOM, $src_link, $upd_flag = false)
{
scraperwiki::sqliteexecute("CREATE TABLE IF NOT EXISTS sources (src_link TEXT PRIMARY KEY, timestamp DATETIME, src_dump TEXT)");
echo "Checking local cache...<br>\n";
$result = scraperwiki::sqliteexecute("SELECT src_link, timestamp, src_dump FROM sources WHERE src_link = :slnk", array("slnk" => $src_link));
if (empty($result->data[0][2]) || $upd_flag == true) {
echo "No Cache for this site (or force-update flag given), scraping live site for local cache...<br>\n";
// Load the site and save it locally so that we dont end up crawling their site a million times during development
$source = scraperWiki::scrape($src_link);
$htmlDOM->load($source);
$save_source = $htmlDOM->save();
echo "Scrape complete, storing into cache...<br>\n";
scraperwiki::sqliteexecute("INSERT OR REPLACE INTO sources VALUES (:slnk, :stime, :sdmp)", array("slnk" => $src_link, "stime" => time(), "sdmp" => $save_source));
scraperwiki::sqlitecommit();
echo "Cache saved.<br>\n";
echo "Populate DOM Complete.";
return $htmlDOM;
} else {
echo "Using local cache, as cached data exists from '" . date(DATE_RFC822, $result->data[0][1]) . ".'<br>\n";
echo "Loading...<br>\n";
$htmlDOM->load($result->data[0][2]);
echo "Populate DOM Complete.";
return $htmlDOM;
}
}
示例13: scrapepage
function scrapepage($url)
{
$html = scraperWiki::scrape($url);
$html = new simple_html_dom();
$html->load($url);
foreach ($html->find("table[@class='products-list'] tr td h2 a") as $menu_link) {
$menu_link = $product_link->href;
echo "Link to Details: " . $product_link . "<br>";
}
}
示例14: getDetails
function getDetails($url, $team)
{
global $teams;
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("tr.active") as $data) {
$tds = $data->find("td.title a");
$teams[$team]['nextOpponent'] = $tds[0]->plaintext;
}
}
示例15: scrapeTeams
function scrapeTeams($url)
{
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$cells = $dom->find('td.cw a');
foreach ($cells as $cell) {
$name = $cell->plaintext;
$team = array('club' => $name);
scraperWiki::save_sqlite(array('club'), $team);
}
}