本文整理匯總了PHP中scraperwiki::save方法的典型用法代碼示例。如果您正苦於以下問題:PHP scraperwiki::save方法的具體用法?PHP scraperwiki::save怎麽用?PHP scraperwiki::save使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類scraperwiki
的用法示例。
在下文中一共展示了scraperwiki::save方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的PHP代碼示例。
示例1: scrapPage
function scrapPage($page)
{
print "Scraping page " . $page;
$url = "http://www.geipan.fr/index.php?id=202";
$fields_string = "&no_cache=1&" . "tx_geipansearch_pi1%5Bsubmit_form%5D=1&" . "tx_geipansearch_pi1%5Btexte_resume%5D=&" . "tx_geipansearch_pi1%5Bdate_debut%5D=&" . "tx_geipansearch_pi1%5Bdate_fin%5D=&" . "no_cache=1&" . "tx_geipansearch_pi1%5Bclasse_cas%5D=tous&" . "tx_geipansearch_pi1%5Bregion%5D=&" . "page=" . $page . "&" . "order_by=&" . "sens=";
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
curl_setopt($curl, CURLOPT_POST, 11);
curl_setopt($curl, CURLOPT_POSTFIELDS, $fields_string);
$html = curl_exec($curl);
print curl_error($curl) . "\n";
// print($html);
$dom = new simple_html_dom();
$dom->load($html);
$trs = $dom->find("tr");
foreach ($trs as $tr) {
if (isset($tr->attr['onclick'])) {
$ID = substr($tr->attr['onclick'], strpos($tr->attr['onclick'], "cas=") + 4, 13);
print $ID . "\n";
$tds = $tr->find("td");
$title = utf8_encode($tds[0]->plaintext);
$date = $tds[1]->plaintext;
$departement = utf8_encode($tds[2]->plaintext);
$classe = $tds[3]->plaintext;
$maj = $tds[4]->plaintext;
$city = substr($title, 0, strpos($title, "(") - 1);
$record = array('ID' => $ID, 'title' => $title, 'date' => $date, 'departement' => $departement, 'classe' => $classe, 'maj' => $maj, 'city' => $city);
scraperwiki::save(array('ID', 'maj'), $record);
}
}
}
示例2: clubURL
function clubURL($url)
{
$html = scraperwiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$clubName = trim(str_replace(' ', '', $dom->find('table', 0)->find('tr', 2)->plaintext));
$formatClubName = trim(preg_replace('/\\s+/', ' ', $clubName));
$_GLOBAL['clubs'][] = $formatClubName;
echo 'running ' . $formatClubName . "\n";
foreach ($dom->find('table', 2)->find('tr') as $row) {
if (is_numeric($row->find('td', 0)->plaintext)) {
$year = trim($row->find('td', 0)->plaintext);
$position = trim(str_replace(' ', '', $row->find('td', 1)->plaintext));
if (trim($position) == 'Champion') {
$position = 1;
}
$leagueLevel = trim($row->find('td', 2)->plaintext);
$overallPosition = trim($row->find('td', 3)->plaintext);
$avgAttendance = trim(str_replace('.', '', $row->find('td', 4)->plaintext));
$totalAttendance = trim(str_replace('.', '', $row->find('td', 12)->plaintext));
$dataset = array('club' => $formatClubName, 'year' => $year, 'finishedPosition' => $position, 'league' => $leagueLevel, 'overallPosition' => $overallPosition, 'avgAttendance' => $avgAttendance, 'totalAttendance' => $totalAttendance);
scraperwiki::save(array('club', 'year'), $dataset);
}
}
/*
* The next to lines stop a memory leak in Simple XML as per http://simplehtmldom.sourceforge.net/manual_faq.htm#memory_leak
*/
$dom->clear();
unset($dom);
}
示例3: scrapeTEDRSS
function scrapeTEDRSS($url, $sector)
{
print $url . " " . $sector . "\n";
// $xml = scraperWiki::scrape($url);
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
// 10 second before aborting
// try CURLOPT_CONNECTTIMEOUT (in seconds)
// try CURLOPT_LOW_SPEED_LIMIT (to define what slow is, with):
// curl_setopt($curl, CURLOPT_LOW_SPEED_TIME, 10); (10 second at low speed before aborting
$xml = curl_exec($curl);
print curl_error($curl) . "\n";
$dom = new simple_html_dom();
$dom->load($xml);
$items = $dom->find("item");
foreach ($items as $item) {
$guid = $item->find("guid");
$noticeURL = str_replace("TEXT", "DATA", $guid[0]->plaintext);
print $noticeURL . " " . $sector . " " . memory_get_usage() / 1000000 . "MB";
echo "\n";
// $record = scrapeTEDDataPage ($noticeURL, $sector);
$record = array('time' => microtime(true), 'sector' => $sector, 'url' => $noticeURL);
scraperwiki::save(array('sector', 'url'), $record);
sleep(1);
}
$dom->__destruct();
unset($items);
unset($dom);
unset($xml);
print memory_get_usage() / 1024 / 1024 . "MB\n";
}
示例4: saveIt
function saveIt($txt)
{
global $joke_count;
$record = array('JOKE_ID' => ++$joke_count, 'JOKE_TEXT' => $txt);
scraperwiki::save(array('JOKE_ID'), $record);
//var_dump($record);
}
示例5: getIngredients
function getIngredients($html)
{
$i = 0;
$dom = new simple_html_dom();
$dom->load($html);
//foreach($dom->find('result-item',1)->href as $data)
//{
// if ($data != null)
//$res = trim($data->plaintext);
$res = $dom->find('a[class=callout]', 1)->href;
$res = str_replace("reviews/", "", $res);
echo "http://www.foodnetwork.com" . $res;
$html1 = scraperwiki::scrape("http://www.foodnetwork.com" . $res);
$domFoods = new simple_html_dom();
//$domFoods->load($html1);
$h = str_get_html($html1);
//echo $domFoods;
echo "\n\n";
foreach ($h->find('li[class=ingredient]') as $data) {
$ingredient = $data->plaintext;
if (isset($h->href)) {
$href = $h->href;
}
//foreach($domFoods->find('ul[class=kv-ingred-list1]',1)->children() as $data){
//echo $data->plaintext;
scraperwiki::save(array('ing'), array('ing' => $ingredient, 'href' => $href));
}
}
示例6: gazelangs
function gazelangs($url, $lang)
{
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$michi = "strong";
$michi = $michi . " hope";
foreach ($dom->find("ul[@class='trans_sent']") as $data) {
$tds = $data->find("li");
$record = array('user_input' => $tds[0]->plaintext, 'babelfish_output' => $tds[1]->plaintext, 'timestamp_scrape' => date("Y-m-d H:i:s"), 'page' => $url, 'language' => $lang);
// print json_encode($record) . "\n";
scraperwiki::save(array('user_input', 'babelfish_output', 'timestamp_scrape', 'page', 'language'), $record);
}
}
示例7: scrapeIdeeLab
function scrapeIdeeLab()
{
$html = scraperWiki::scrape("http://ideelab.wordpress.com/category/uudis/");
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find('div.status-publish') as $data) {
$newsTitle = $data->find('div.posttitle h2.pagetitle');
// print($newsTitle[0]->plaintext."\n");
$newsBody = $data->find('div.entry');
// print($newsBody[0]->plaintext."\n");
$record = array('title' => $newsTitle[0]->plaintext, 'newsbody' => $newsBody[0]->plaintext);
scraperwiki::save(array('title', 'newsbody'), $record);
}
}
示例8: grab
function grab($url)
{
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("#tbl_proxy_list tr") as $data) {
$tds = $data->find("td");
if (count($tds) == 7) {
$input = decode_ip((string) $tds[0]);
$record = array('ip' => $input);
scraperwiki::save(array('ip'), $record);
}
}
}
示例9: extract_data
function extract_data($value)
{
$htmlvalue = str_get_html($value);
//print $htmlvalue;
$link = $htmlvalue->find('li[class="first last"] a', 0);
$title = $htmlvalue->find('li[class="first last"] a', 0);
$description = $htmlvalue->find('li[class="first last"] a', 0);
$date = $htmlvalue->find('span[class="date-display-single"]', 0);
$processdate = substr($date->plaintext, -10);
//print $link->href. "\n";
//print $title->plaintext. "\n";
//print $description->plaintext. "\n";
$when = date_create_from_format('d/m/Y', $processdate);
print_date($when);
$data = array('link' => $link->href, 'title' => $title->plaintext, 'description' => $description->plaintext, 'date' => $when);
scraperwiki::save(array('title'), $data);
}
示例10: getExcuse
function getExcuse($extension)
{
global $html;
global $count;
$root = "http://www.goodexcuses.co.uk";
//$extension = "/Excuses/My-fish-is-sick-and-I-need-to-take-it-to-the-vet/" ;
$html = file_get_html($root . $extension);
//The excuse
$excuse = $html->find('h2', 0)->innertext;
echo $excuse . "\n";
//save to DB
$record = array('EXCUSE_ID' => ++$count, 'EXCUSE_TEXT' => $excuse, 'EXCUSE_URL' => $extension);
scraperwiki::save(array('EXCUSE_ID'), $record);
//Get next url
//echo "\n".goToNextURL()."\n";
goToNextURL();
}
示例11: loadPageGallery
function loadPageGallery($url)
{
$htmlGallery = scraperWiki::scrape($url);
$domGallery = new simple_html_dom();
$domGallery->load($htmlGallery);
foreach ($domGallery->find("div#contentDetail1") as $data) {
$title = $data->find("h3");
$adressclass = $data->find('.adres');
$urlandemail = $data->find('.adres a');
$artists = $data->find('.artists');
$contactName = explode("\n", $adressclass[0]->plaintext);
list($contactNameGallery) = $contactName;
$tels = explode("\n", $adressclass[4]->plaintext);
list($tel1, $tel2) = $tels;
$record = array('name' => $title[0]->plaintext, 'contact' => $contactNameGallery, 'url' => $urlandemail[0]->plaintext, 'email' => $urlandemail[1]->plaintext, 'address' => $adressclass[1]->plaintext, 'tel1' => $tel1, 'tel2' => $tel2, 'artists' => $artists[0]->plaintext);
scraperwiki::save(array('name', 'contact', 'url', 'email', 'address', 'tel1', 'tel2', 'artists'), $record);
//print_r($record);
}
}
示例12: scrape_job_page
function scrape_job_page($page)
{
$page_html = scraperWiki::scrape("https://jobsearch.direct.gov.uk/JobSearch/PowerSearch.aspx?tm=0&pg=" . $page);
$dom = new simple_html_dom();
$dom->load($page_html);
foreach ($dom->find("table tr") as $data) {
$tds = $data->find("td");
if (count($tds) == 5) {
$id_hyperlink = $tds[0]->find('a[name]', 0);
$id = intval($id_hyperlink->name);
$more_info_hyperlink = $tds[2]->find('a', 0)->href;
print $more_info_hyperlink;
$record = array('id' => $id, 'posted_date' => date_create($tds[0]->plaintext), 'job_title' => trim($tds[2]->plaintext), 'company' => trim($tds[3]->plaintext), 'location' => trim($tds[4]->plaintext), 'url' => $more_info_hyperlink);
//print json_encode($record) . "\n";
scraperwiki::save(array('id'), $record);
}
}
$dom->__destruct();
}
示例13: getCitieListByATO
function getCitieListByATO($p_atoCODE = "")
{
$html = scraperWiki::scrape("http://www.rifiutiebonifica.puglia.it/dettaglio_differenziata.php?ato=" . $p_atoCODE . "&data=12");
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("table tr") as $data) {
$tds = $data->find("td");
$a = $data->find("a");
if (isset($a[0])) {
$link = $a[0]->href;
$link = str_replace("dettaglio_trasmissione.php?IdComune=", "", $link);
$position = strrpos($link, "&");
$id = substr($link, 0, $position);
$ato = $p_atoCODE;
$comuni = array('comune' => $tds[0]->plaintext, 'id' => $id);
scraperwiki::save(array('id'), $comuni);
}
}
}
示例14: topSites
function topSites()
{
$page = 0;
$country = 'IT';
$limit = 20;
$count = 0;
while ($limit > $page) {
$html = scraperWiki::scrape("http://www.alexa.com/topsites/countries;" . $page . "/" . $country);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("span[class=topsites-label]") as $data) {
$record = array('site' => $data->plaintext);
scraperwiki::save(array('site'), $record);
$count++;
}
++$page;
}
print $count;
}
示例15: data_from_overview_page
function data_from_overview_page($url, $type)
{
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$count = 0;
$base_url = 'http://www.dnr.state.mn.us';
foreach ($dom->find(".paddingbig table tr table tr") as $rows) {
$count++;
$data = $rows->find("td");
$link_image = $data[0]->find("a");
$image = $data[0]->find("img");
$link_text = $data[1]->find("a");
$name = $link_text[0]->plaintext;
if (!empty($data[0]->plaintext)) {
$record = array('id' => $type . '--' . strtolower(str_replace(' ', '-', $name)), 'type' => $type, 'name' => $name, 'link' => !empty($link_image[0]->href) ? $base_url . $link_image[0]->href : '', 'thumb_url' => !empty($image[0]->src) ? $image[0]->src : '', 'timestamp' => time());
scraperwiki::save(array('id'), $record);
}
}
}