本文整理汇总了PHP中scraperwiki类的典型用法代码示例。如果您正苦于以下问题:PHP scraperwiki类的具体用法?PHP scraperwiki怎么用?PHP scraperwiki使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了scraperwiki类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: parseModelsPage
function parseModelsPage($brandId, $brandName, $page)
{
$html_content = scraperwiki::scrape($page);
$this->html = str_get_html($html_content);
foreach ($this->html->find("div.makers a") as $el) {
$img = $el->find('img', 0);
$m['name'] = $brandName . ' ' . $el->find('strong', 0)->innertext;
$m['img'] = $img->src;
$m['link'] = 'http://www.gsmarena.com/' . $el->href;
$m['desc'] = $img->title;
$temp = explode('-', $el->href);
$m['id'] = (int) substr($temp[1], 0, -4);
$m['brand_id'] = $brandId;
scraperwiki::save_sqlite(array("id" => $m['id']), $m, "cell_model");
$this->models++;
}
$pagination = $this->html->find("div.nav-pages", 0);
if ($pagination) {
$nextPageLink = $pagination->lastChild();
if ($nextPageLink && $nextPageLink->title == "Next page") {
$this->parseModelsPage($brandId, $brandName, 'http://www.gsmarena.com/' . $nextPageLink->href);
}
}
$this->html->__destruct();
}
示例2: scrapeTEDRSS
function scrapeTEDRSS($url, $sector)
{
print $url . " " . $sector . "\n";
// $xml = scraperWiki::scrape($url);
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
// 10 second before aborting
// try CURLOPT_CONNECTTIMEOUT (in seconds)
// try CURLOPT_LOW_SPEED_LIMIT (to define what slow is, with):
// curl_setopt($curl, CURLOPT_LOW_SPEED_TIME, 10); (10 second at low speed before aborting
$xml = curl_exec($curl);
print curl_error($curl) . "\n";
$dom = new simple_html_dom();
$dom->load($xml);
$items = $dom->find("item");
foreach ($items as $item) {
$guid = $item->find("guid");
$noticeURL = str_replace("TEXT", "DATA", $guid[0]->plaintext);
print $noticeURL . " " . $sector . " " . memory_get_usage() / 1000000 . "MB";
echo "\n";
// $record = scrapeTEDDataPage ($noticeURL, $sector);
$record = array('time' => microtime(true), 'sector' => $sector, 'url' => $noticeURL);
scraperwiki::save(array('sector', 'url'), $record);
sleep(1);
}
$dom->__destruct();
unset($items);
unset($dom);
unset($xml);
print memory_get_usage() / 1024 / 1024 . "MB\n";
}
示例3: saveIt
function saveIt($txt)
{
global $joke_count;
$record = array('JOKE_ID' => ++$joke_count, 'JOKE_TEXT' => $txt);
scraperwiki::save(array('JOKE_ID'), $record);
//var_dump($record);
}
示例4: scrapeMarketGroup
function scrapeMarketGroup($url)
{
global $visitedIds;
$html = scraperWiki::scrape($url);
$html = str_replace("\n", "", $html);
preg_match_all("|<a href=\"/importing/61000746/marketgroup/(\\d+?)/\">(.+?)</a>|s", $html, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
$groupId = $match[1];
$groupName = html_entity_decode($match[2]);
//echo $groupName."\n";
if (!in_array($groupId, $visitedIds)) {
$visitedIds[] = $groupId;
scrapeMarketGroup("http://goonmetrics.com/importing/61000746/marketgroup/" . $groupId . "/");
}
}
preg_match_all("|<tr(.*?)>(.*?)<td(.*?)><a href=\"http://games.chruker.dk/eve_online/item.php\\?type_id=(.+?)\" target=\"_blank\">(.*?)<span class=\"dot\" onclick=\"CCPEVE.showMarketDetails\\((.*?)\\)\">(.+?)</span>(.*?)</td>(.*?)<td(.*?)>(.+?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.+?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)</tr>|s", $html, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
$item = array("itemId" => trim($match[4]), "name" => trim(mb_check_encoding($match[7], 'UTF-8') ? $match[7] : utf8_encode($match[7])), "weekVol" => trim(mb_check_encoding($match[11], 'UTF-8') ? $match[11] : utf8_encode($match[11])), "k6Stock" => trim(mb_check_encoding($match[17], 'UTF-8') ? $match[17] : utf8_encode($match[17])));
$item['weekVol'] = str_replace(",", "", $item['weekVol']);
$item['k6Stock'] = str_replace(",", "", $item['k6Stock']);
$saved = false;
$delay = 0;
while (!$saved && $delay < 600) {
try {
@scraperwiki::save_sqlite(array('itemId'), $item, 'eve_goonmetrics');
$saved = true;
} catch (Exception $e) {
sleep(10);
$delay++;
}
}
}
}
示例5: do_day
function do_day($rec)
{
$html = scraperwiki::scrape($rec['url']);
$dom = new simple_html_dom();
$dom->load($html);
$cell = $dom->find('a[name=discs]');
$lines = $cell[0]->parent->find('text');
print $lines[10] . "\n";
print count($lines) . "\n";
# loop by number, as null lines stop a foreach
$n = 0;
for ($line_no = 0; $line_no < count($lines); $line_no++) {
$line = $lines[$line_no];
if (strlen($line) == 3) {
# the DOM object crashes on this row, so ignore
continue;
}
#if (preg_match("#^" . $n . "#", $line, $matches)) {
print $line_no . " " . strlen($line) . "\n";
$n = $n + 1;
print $line . "\n";
#}
}
#scraperwiki::save(array('data'), array('data' => $data->plaintext));
}
示例6: scrapPage
function scrapPage($page)
{
print "Scraping page " . $page;
$url = "http://www.geipan.fr/index.php?id=202";
$fields_string = "&no_cache=1&" . "tx_geipansearch_pi1%5Bsubmit_form%5D=1&" . "tx_geipansearch_pi1%5Btexte_resume%5D=&" . "tx_geipansearch_pi1%5Bdate_debut%5D=&" . "tx_geipansearch_pi1%5Bdate_fin%5D=&" . "no_cache=1&" . "tx_geipansearch_pi1%5Bclasse_cas%5D=tous&" . "tx_geipansearch_pi1%5Bregion%5D=&" . "page=" . $page . "&" . "order_by=&" . "sens=";
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
curl_setopt($curl, CURLOPT_POST, 11);
curl_setopt($curl, CURLOPT_POSTFIELDS, $fields_string);
$html = curl_exec($curl);
print curl_error($curl) . "\n";
// print($html);
$dom = new simple_html_dom();
$dom->load($html);
$trs = $dom->find("tr");
foreach ($trs as $tr) {
if (isset($tr->attr['onclick'])) {
$ID = substr($tr->attr['onclick'], strpos($tr->attr['onclick'], "cas=") + 4, 13);
print $ID . "\n";
$tds = $tr->find("td");
$title = utf8_encode($tds[0]->plaintext);
$date = $tds[1]->plaintext;
$departement = utf8_encode($tds[2]->plaintext);
$classe = $tds[3]->plaintext;
$maj = $tds[4]->plaintext;
$city = substr($title, 0, strpos($title, "(") - 1);
$record = array('ID' => $ID, 'title' => $title, 'date' => $date, 'departement' => $departement, 'classe' => $classe, 'maj' => $maj, 'city' => $city);
scraperwiki::save(array('ID', 'maj'), $record);
}
}
}
示例7: scrape_page
function scrape_page()
{
$row = 0;
$html = scraperWiki::scrape("http://asuntojen.hintatiedot.fi/haku/?c=" . $GLOBALS['c'] . "&s=" . $GLOBALS['s'] . "&r=" . $GLOBALS['r'] . "&amin=" . $GLOBALS['amin'] . "&amax=" . $GLOBALS['amax'] . "&z=" . $GLOBALS['z']);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("tr") as $data) {
$tds = $data->find("td");
if (count($tds) > 8) {
$row++;
$GLOBALS['rowTotal']++;
$apt = array("Uniikkiavain" => $GLOBALS['rowTotal'], "Kaupunginosa" => $tds[0]->plaintext, "Myyntihinta" => $tds[3]->plaintext, "Neliohinta" => $tds[4]->plaintext, "Tyyppi" => $tds[1]->plaintext, "Koko" => $tds[2]->plaintext);
scraperwiki::save_sqlite(null, $apt, $table_name = $GLOBALS['c'] . " " . $GLOBALS['time']);
print $GLOBALS['rowTotal'] . "\n";
print $row . ". Sijainti: " . $tds[0]->plaintext . " Hinta: " . $tds[3]->plaintext . " Tyyppi: " . $tds[1]->plaintext . " Koko: " . $tds[2]->plaintext . " Neliöhinta: " . $tds[4]->plaintext . "€" . "\n";
}
}
if ($row == 50) {
print "Vielä jatkuu, haetaan seuraava sivu..." . "\n";
$GLOBALS['z']++;
scrape_page();
} else {
print "Skrääpiminen suoritettu." . "\n";
print "Sivuja yhteensä: " . $GLOBALS['z'] . "\n";
print "Rivejä yhteensä: " . $GLOBALS['rowTotal'] . "\n";
}
}
示例8: clubURL
function clubURL($url)
{
$html = scraperwiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$clubName = trim(str_replace(' ', '', $dom->find('table', 0)->find('tr', 2)->plaintext));
$formatClubName = trim(preg_replace('/\\s+/', ' ', $clubName));
$_GLOBAL['clubs'][] = $formatClubName;
echo 'running ' . $formatClubName . "\n";
foreach ($dom->find('table', 2)->find('tr') as $row) {
if (is_numeric($row->find('td', 0)->plaintext)) {
$year = trim($row->find('td', 0)->plaintext);
$position = trim(str_replace(' ', '', $row->find('td', 1)->plaintext));
if (trim($position) == 'Champion') {
$position = 1;
}
$leagueLevel = trim($row->find('td', 2)->plaintext);
$overallPosition = trim($row->find('td', 3)->plaintext);
$avgAttendance = trim(str_replace('.', '', $row->find('td', 4)->plaintext));
$totalAttendance = trim(str_replace('.', '', $row->find('td', 12)->plaintext));
$dataset = array('club' => $formatClubName, 'year' => $year, 'finishedPosition' => $position, 'league' => $leagueLevel, 'overallPosition' => $overallPosition, 'avgAttendance' => $avgAttendance, 'totalAttendance' => $totalAttendance);
scraperwiki::save(array('club', 'year'), $dataset);
}
}
/*
* The next to lines stop a memory leak in Simple XML as per http://simplehtmldom.sourceforge.net/manual_faq.htm#memory_leak
*/
$dom->clear();
unset($dom);
}
示例9: grep_munich
function grep_munich($url, $table_name)
{
$html = scraperWiki::scrape($url);
$count = 0;
# Use the PHP Simple HTML DOM Parser to extract <td> tags
$dom = new simple_html_dom();
$dom->load($html);
//Drop all old informations by dropping the table
scraperwiki::sqliteexecute("drop table if exists " . $table_name);
scraperwiki::sqlitecommit();
$table = $dom->getElementById('flight_info_area');
foreach ($table->find('tr') as $data) {
// Flight details. Read tds or ths
$tds = $data->find("td");
//if there are less then 7 columns continue to next loop
if (sizeof($tds) < 7) {
continue;
}
//print $data->plaintext . "\n";
$flightnr = $tds[1]->plaintext;
$from = $tds[2]->plaintext;
$time = $tds[3]->plaintext;
$expected_time = $tds[4]->plaintext;
//Create date
$date = date("Y-m-d");
//Build array of flight informations
$flight_data = array("date" => $date, "count" => $count, "flightnr" => $flightnr, "from" => $from, "time" => $time, "expected_time" => $expected_time);
//Save the informations of one flight
scraperwiki::save_sqlite(array("date", "count"), $flight_data, $table_name);
$count = $count + 1;
}
}
示例10: scrap_yp
function scrap_yp($last_alphabet = '', $last_page = '')
{
$alphabet = range('a', 'z');
if (is_null($last_alphabet) || $last_alphabet == '') {
$temp_alphabet = scraperwiki::get_var('last_alphabet_loaded');
if (!is_null($temp_alphabet)) {
$last_alphabet = $temp_alphabet;
} else {
$last_alphabet = 'a';
}
}
if (is_null($last_page) || $last_page == '') {
$temp_page = scraperwiki::get_var('last_page_loaded');
if (!is_null($temp_page)) {
$last_page = $temp_page;
} else {
$last_page = 1;
}
}
$yp_base_url = 'http://www.yellowpages.co.id/browse/letter/' . $last_alphabet . '?page=' . $last_page;
$html = scraperWiki::scrape($yp_base_url);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("ul.directory-list") as $data) {
echo $data;
}
}
示例11: getIngredients
function getIngredients($html)
{
$i = 0;
$dom = new simple_html_dom();
$dom->load($html);
//foreach($dom->find('result-item',1)->href as $data)
//{
// if ($data != null)
//$res = trim($data->plaintext);
$res = $dom->find('a[class=callout]', 1)->href;
$res = str_replace("reviews/", "", $res);
echo "http://www.foodnetwork.com" . $res;
$html1 = scraperwiki::scrape("http://www.foodnetwork.com" . $res);
$domFoods = new simple_html_dom();
//$domFoods->load($html1);
$h = str_get_html($html1);
//echo $domFoods;
echo "\n\n";
foreach ($h->find('li[class=ingredient]') as $data) {
$ingredient = $data->plaintext;
if (isset($h->href)) {
$href = $h->href;
}
//foreach($domFoods->find('ul[class=kv-ingred-list1]',1)->children() as $data){
//echo $data->plaintext;
scraperwiki::save(array('ing'), array('ing' => $ingredient, 'href' => $href));
}
}
示例12: run_ml
function run_ml($q_num = 0)
{
$html = scraperWiki::scrape("http://musiklegal.com/search/result/a/" . $q_num);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("tr") as $data) {
$tds = $data->find("td");
$temp_data = explode('">', str_replace('</<strong>a</strong>>', '', str_replace('<<strong>a</strong> href="http://musiklegal.com/song/detail/', '', $tds[1]->plaintext)));
$record = array('No' => str_replace('.', '', $tds[0]->plaintext), 'Code' => $temp_data[0], 'Song Title' => $temp_data[1], 'Artist' => $tds[2]->plaintext, 'Album' => $tds[3]->plaintext);
/*
* Stores results
*/
scraperwiki::save_sqlite(array("No"), $record);
unset($temp_data);
}
foreach ($dom->find("a") as $a) {
if ($a->plaintext == 'Next') {
$tmp_a = $a->href;
$tmp_a = str_replace('http://musiklegal.com/search/result/a/', '', $tmp_a);
if ($tmp_a > 0) {
continue;
}
}
}
if ((int) $tmp_a != 0) {
run_ml($tmp_a);
} else {
exit;
}
}
示例13: getCategories
function getCategories($u)
{
global $baseurl, $f;
$path = "";
$d = new simple_html_dom();
$d->load(scraperwiki::scrape($u));
echo "Loaded URL: " . $u . "\n";
if ($d->find('div[id=ctl00_cphContent_gsaCatFacetContainer]')) {
$breadcrumb = $d->find('div[id=breadcrumb]', 0);
//foreach($breadcrumb as $b) {
//echo "Breadcrumb = " . $b;}
if (!is_null($breadcrumb)) {
foreach ($breadcrumb->children() as $crumb) {
$path .= trim($crumb->innertext) . "/";
}
$path .= trim(strrchr($breadcrumb->innertext, ">"), "> ");
}
foreach ($d->find('div[id=ctl00_cphContent_gsaCatFacetContainer]', 0)->find('div[class=S2refinementsContainer]', 0)->children() as $div) {
$name = trim(strstr($div->children(0)->innertext, "(", true));
$url = $baseurl . $div->children(0)->href;
$data = array("Name" => $name, "Path" => $path, "URL" => $url);
echo $path . "/" . $name . "\n";
if ($local) {
fputcsv($f, array($name, $path, $url));
} else {
scraperwiki::save_sqlite(array("URL"), $data);
}
getCategories($url);
}
}
}
示例14: scrapeDetails
function scrapeDetails($ngo)
{
$html_content = scraperwiki::scrape($ngo["url"]);
$dom = new simple_html_dom();
$dom->load($html_content);
$infosWeWant = array('Telefon', 'Rechtsform', 'Steuerstatus', 'Weltanschauliche Ausrichtung', 'Anzahl Mitarbeiter', 'Gesamteinnahmen:', 'Davon Sammlungseinnahmen', 'Bezugsjahr:');
// Scrape Details from all paragraphs
$paragraphs = $dom->find('p');
foreach ($paragraphs as $p) {
if (strstr($p->plaintext, "Website")) {
$ngo["website"] = $p->find('a', 0)->href;
}
if (strstr($p->plaintext, "Email")) {
$ngo["email"] = $p->find('a', 0)->plaintext;
}
foreach ($infosWeWant as $key => $info) {
$res = extractInfo($p, $info);
if ($res) {
$ngo[$info] = $res;
//Do not search for this info again
unset($infosWeWant[$key]);
}
}
}
print_r($ngo);
return $ngo;
}
示例15: getProducts
function getProducts($u, $cat)
{
global $o;
$d = new simple_html_dom();
$d->load(scraperwiki::scrape($u));
//echo "Loaded URL: " . $u . "\n";
$items = $d->find('li.grid-item');
if (count($items) > 0) {
foreach ($items as $p) {
$prod = $p->find('p.product-name > a', 0);
$prodname = trim($prod->innertext);
$prodURL = $prod->href;
if (!is_null($p->find('p.minimal-price', 0))) {
$prodtype = 1;
} else {
$prodtype = 0;
}
fputcsv($o, array($prodname, $prodtype, $cat, $prodURL));
echo $prodname . "\n";
}
if (!is_null($d->find('p.next', 0))) {
getProducts($d->find('p.next', 0)->href, $cat);
}
}
}