當前位置: 首頁>>代碼示例>>PHP>>正文


PHP scraperwiki::save方法代碼示例

本文整理匯總了PHP中scraperwiki::save方法的典型用法代碼示例。如果您正苦於以下問題:PHP scraperwiki::save方法的具體用法?PHP scraperwiki::save怎麽用?PHP scraperwiki::save使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在scraperwiki的用法示例。


在下文中一共展示了scraperwiki::save方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的PHP代碼示例。

示例1: scrapPage

function scrapPage($page)
{
    print "Scraping page " . $page;
    $url = "http://www.geipan.fr/index.php?id=202";
    $fields_string = "&no_cache=1&" . "tx_geipansearch_pi1%5Bsubmit_form%5D=1&" . "tx_geipansearch_pi1%5Btexte_resume%5D=&" . "tx_geipansearch_pi1%5Bdate_debut%5D=&" . "tx_geipansearch_pi1%5Bdate_fin%5D=&" . "no_cache=1&" . "tx_geipansearch_pi1%5Bclasse_cas%5D=tous&" . "tx_geipansearch_pi1%5Bregion%5D=&" . "page=" . $page . "&" . "order_by=&" . "sens=";
    $curl = curl_init($url);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
    curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($curl, CURLOPT_TIMEOUT, 20);
    curl_setopt($curl, CURLOPT_POST, 11);
    curl_setopt($curl, CURLOPT_POSTFIELDS, $fields_string);
    $html = curl_exec($curl);
    print curl_error($curl) . "\n";
    //      print($html);
    $dom = new simple_html_dom();
    $dom->load($html);
    $trs = $dom->find("tr");
    foreach ($trs as $tr) {
        if (isset($tr->attr['onclick'])) {
            $ID = substr($tr->attr['onclick'], strpos($tr->attr['onclick'], "cas=") + 4, 13);
            print $ID . "\n";
            $tds = $tr->find("td");
            $title = utf8_encode($tds[0]->plaintext);
            $date = $tds[1]->plaintext;
            $departement = utf8_encode($tds[2]->plaintext);
            $classe = $tds[3]->plaintext;
            $maj = $tds[4]->plaintext;
            $city = substr($title, 0, strpos($title, "(") - 1);
            $record = array('ID' => $ID, 'title' => $title, 'date' => $date, 'departement' => $departement, 'classe' => $classe, 'maj' => $maj, 'city' => $city);
            scraperwiki::save(array('ID', 'maj'), $record);
        }
    }
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:35,代碼來源:geipan.php

示例2: clubURL

function clubURL($url)
{
    $html = scraperwiki::scrape($url);
    $dom = new simple_html_dom();
    $dom->load($html);
    $clubName = trim(str_replace(' ', '', $dom->find('table', 0)->find('tr', 2)->plaintext));
    $formatClubName = trim(preg_replace('/\\s+/', ' ', $clubName));
    $_GLOBAL['clubs'][] = $formatClubName;
    echo 'running ' . $formatClubName . "\n";
    foreach ($dom->find('table', 2)->find('tr') as $row) {
        if (is_numeric($row->find('td', 0)->plaintext)) {
            $year = trim($row->find('td', 0)->plaintext);
            $position = trim(str_replace(' ', '', $row->find('td', 1)->plaintext));
            if (trim($position) == 'Champion') {
                $position = 1;
            }
            $leagueLevel = trim($row->find('td', 2)->plaintext);
            $overallPosition = trim($row->find('td', 3)->plaintext);
            $avgAttendance = trim(str_replace('.', '', $row->find('td', 4)->plaintext));
            $totalAttendance = trim(str_replace('.', '', $row->find('td', 12)->plaintext));
            $dataset = array('club' => $formatClubName, 'year' => $year, 'finishedPosition' => $position, 'league' => $leagueLevel, 'overallPosition' => $overallPosition, 'avgAttendance' => $avgAttendance, 'totalAttendance' => $totalAttendance);
            scraperwiki::save(array('club', 'year'), $dataset);
        }
    }
    /*
     * The next to lines stop a memory leak in Simple XML as per http://simplehtmldom.sourceforge.net/manual_faq.htm#memory_leak
     */
    $dom->clear();
    unset($dom);
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:30,代碼來源:pauls-hmhse-scraper.php

示例3: scrapeTEDRSS

function scrapeTEDRSS($url, $sector)
{
    print $url . " " . $sector . "\n";
    // $xml = scraperWiki::scrape($url);
    $curl = curl_init($url);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
    curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($curl, CURLOPT_TIMEOUT, 20);
    // 10 second before aborting
    // try CURLOPT_CONNECTTIMEOUT (in seconds)
    // try CURLOPT_LOW_SPEED_LIMIT (to define what slow is, with):
    // curl_setopt($curl, CURLOPT_LOW_SPEED_TIME, 10); (10 second at low speed before aborting
    $xml = curl_exec($curl);
    print curl_error($curl) . "\n";
    $dom = new simple_html_dom();
    $dom->load($xml);
    $items = $dom->find("item");
    foreach ($items as $item) {
        $guid = $item->find("guid");
        $noticeURL = str_replace("TEXT", "DATA", $guid[0]->plaintext);
        print $noticeURL . " " . $sector . " " . memory_get_usage() / 1000000 . "MB";
        echo "\n";
        // $record = scrapeTEDDataPage ($noticeURL, $sector);
        $record = array('time' => microtime(true), 'sector' => $sector, 'url' => $noticeURL);
        scraperwiki::save(array('sector', 'url'), $record);
        sleep(1);
    }
    $dom->__destruct();
    unset($items);
    unset($dom);
    unset($xml);
    print memory_get_usage() / 1024 / 1024 . "MB\n";
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:35,代碼來源:dk-ted.php

示例4: saveIt

function saveIt($txt)
{
    global $joke_count;
    $record = array('JOKE_ID' => ++$joke_count, 'JOKE_TEXT' => $txt);
    scraperwiki::save(array('JOKE_ID'), $record);
    //var_dump($record);
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:7,代碼來源:chuck_norris.php

示例5: getIngredients

function getIngredients($html)
{
    $i = 0;
    $dom = new simple_html_dom();
    $dom->load($html);
    //foreach($dom->find('result-item',1)->href as $data)
    //{
    // if ($data != null)
    //$res = trim($data->plaintext);
    $res = $dom->find('a[class=callout]', 1)->href;
    $res = str_replace("reviews/", "", $res);
    echo "http://www.foodnetwork.com" . $res;
    $html1 = scraperwiki::scrape("http://www.foodnetwork.com" . $res);
    $domFoods = new simple_html_dom();
    //$domFoods->load($html1);
    $h = str_get_html($html1);
    //echo $domFoods;
    echo "\n\n";
    foreach ($h->find('li[class=ingredient]') as $data) {
        $ingredient = $data->plaintext;
        if (isset($h->href)) {
            $href = $h->href;
        }
        //foreach($domFoods->find('ul[class=kv-ingred-list1]',1)->children() as $data){
        //echo $data->plaintext;
        scraperwiki::save(array('ing'), array('ing' => $ingredient, 'href' => $href));
    }
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:28,代碼來源:foodnetwork.php

示例6: gazelangs

function gazelangs($url, $lang)
{
    $html = scraperWiki::scrape($url);
    $dom = new simple_html_dom();
    $dom->load($html);
    $michi = "strong";
    $michi = $michi . " hope";
    foreach ($dom->find("ul[@class='trans_sent']") as $data) {
        $tds = $data->find("li");
        $record = array('user_input' => $tds[0]->plaintext, 'babelfish_output' => $tds[1]->plaintext, 'timestamp_scrape' => date("Y-m-d H:i:s"), 'page' => $url, 'language' => $lang);
        // print json_encode($record) . "\n";
        scraperwiki::save(array('user_input', 'babelfish_output', 'timestamp_scrape', 'page', 'language'), $record);
    }
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:14,代碼來源:test_77.php

示例7: scrapeIdeeLab

function scrapeIdeeLab()
{
    $html = scraperWiki::scrape("http://ideelab.wordpress.com/category/uudis/");
    $dom = new simple_html_dom();
    $dom->load($html);
    foreach ($dom->find('div.status-publish') as $data) {
        $newsTitle = $data->find('div.posttitle h2.pagetitle');
        //    print($newsTitle[0]->plaintext."\n");
        $newsBody = $data->find('div.entry');
        //    print($newsBody[0]->plaintext."\n");
        $record = array('title' => $newsTitle[0]->plaintext, 'newsbody' => $newsBody[0]->plaintext);
        scraperwiki::save(array('title', 'newsbody'), $record);
    }
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:14,代碼來源:ideelabnewsscraper.php

示例8: grab

function grab($url)
{
    $html = scraperWiki::scrape($url);
    $dom = new simple_html_dom();
    $dom->load($html);
    foreach ($dom->find("#tbl_proxy_list tr") as $data) {
        $tds = $data->find("td");
        if (count($tds) == 7) {
            $input = decode_ip((string) $tds[0]);
            $record = array('ip' => $input);
            scraperwiki::save(array('ip'), $record);
        }
    }
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:14,代碼來源:proxyparser2.php

示例9: extract_data

function extract_data($value)
{
    $htmlvalue = str_get_html($value);
    //print $htmlvalue;
    $link = $htmlvalue->find('li[class="first last"] a', 0);
    $title = $htmlvalue->find('li[class="first last"] a', 0);
    $description = $htmlvalue->find('li[class="first last"] a', 0);
    $date = $htmlvalue->find('span[class="date-display-single"]', 0);
    $processdate = substr($date->plaintext, -10);
    //print $link->href. "\n";
    //print $title->plaintext. "\n";
    //print $description->plaintext. "\n";
    $when = date_create_from_format('d/m/Y', $processdate);
    print_date($when);
    $data = array('link' => $link->href, 'title' => $title->plaintext, 'description' => $description->plaintext, 'date' => $when);
    scraperwiki::save(array('title'), $data);
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:17,代碼來源:aagbi_totw_element_browse.php

示例10: getExcuse

function getExcuse($extension)
{
    global $html;
    global $count;
    $root = "http://www.goodexcuses.co.uk";
    //$extension = "/Excuses/My-fish-is-sick-and-I-need-to-take-it-to-the-vet/" ;
    $html = file_get_html($root . $extension);
    //The excuse
    $excuse = $html->find('h2', 0)->innertext;
    echo $excuse . "\n";
    //save to DB
    $record = array('EXCUSE_ID' => ++$count, 'EXCUSE_TEXT' => $excuse, 'EXCUSE_URL' => $extension);
    scraperwiki::save(array('EXCUSE_ID'), $record);
    //Get next url
    //echo "\n".goToNextURL()."\n";
    goToNextURL();
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:17,代碼來源:goodexcuses.php

示例11: loadPageGallery

function loadPageGallery($url)
{
    $htmlGallery = scraperWiki::scrape($url);
    $domGallery = new simple_html_dom();
    $domGallery->load($htmlGallery);
    foreach ($domGallery->find("div#contentDetail1") as $data) {
        $title = $data->find("h3");
        $adressclass = $data->find('.adres');
        $urlandemail = $data->find('.adres a');
        $artists = $data->find('.artists');
        $contactName = explode("\n", $adressclass[0]->plaintext);
        list($contactNameGallery) = $contactName;
        $tels = explode("\n", $adressclass[4]->plaintext);
        list($tel1, $tel2) = $tels;
        $record = array('name' => $title[0]->plaintext, 'contact' => $contactNameGallery, 'url' => $urlandemail[0]->plaintext, 'email' => $urlandemail[1]->plaintext, 'address' => $adressclass[1]->plaintext, 'tel1' => $tel1, 'tel2' => $tel2, 'artists' => $artists[0]->plaintext);
        scraperwiki::save(array('name', 'contact', 'url', 'email', 'address', 'tel1', 'tel2', 'artists'), $record);
        //print_r($record);
    }
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:19,代碼來源:galeries.php

示例12: scrape_job_page

function scrape_job_page($page)
{
    $page_html = scraperWiki::scrape("https://jobsearch.direct.gov.uk/JobSearch/PowerSearch.aspx?tm=0&pg=" . $page);
    $dom = new simple_html_dom();
    $dom->load($page_html);
    foreach ($dom->find("table tr") as $data) {
        $tds = $data->find("td");
        if (count($tds) == 5) {
            $id_hyperlink = $tds[0]->find('a[name]', 0);
            $id = intval($id_hyperlink->name);
            $more_info_hyperlink = $tds[2]->find('a', 0)->href;
            print $more_info_hyperlink;
            $record = array('id' => $id, 'posted_date' => date_create($tds[0]->plaintext), 'job_title' => trim($tds[2]->plaintext), 'company' => trim($tds[3]->plaintext), 'location' => trim($tds[4]->plaintext), 'url' => $more_info_hyperlink);
            //print json_encode($record) . "\n";
            scraperwiki::save(array('id'), $record);
        }
    }
    $dom->__destruct();
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:19,代碼來源:universaljobmatch.php

示例13: getCitieListByATO

function getCitieListByATO($p_atoCODE = "")
{
    $html = scraperWiki::scrape("http://www.rifiutiebonifica.puglia.it/dettaglio_differenziata.php?ato=" . $p_atoCODE . "&data=12");
    $dom = new simple_html_dom();
    $dom->load($html);
    foreach ($dom->find("table tr") as $data) {
        $tds = $data->find("td");
        $a = $data->find("a");
        if (isset($a[0])) {
            $link = $a[0]->href;
            $link = str_replace("dettaglio_trasmissione.php?IdComune=", "", $link);
            $position = strrpos($link, "&");
            $id = substr($link, 0, $position);
            $ato = $p_atoCODE;
            $comuni = array('comune' => $tds[0]->plaintext, 'id' => $id);
            scraperwiki::save(array('id'), $comuni);
        }
    }
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:19,代碼來源:test_rifiuti_puglia.php

示例14: topSites

function topSites()
{
    $page = 0;
    $country = 'IT';
    $limit = 20;
    $count = 0;
    while ($limit > $page) {
        $html = scraperWiki::scrape("http://www.alexa.com/topsites/countries;" . $page . "/" . $country);
        $dom = new simple_html_dom();
        $dom->load($html);
        foreach ($dom->find("span[class=topsites-label]") as $data) {
            $record = array('site' => $data->plaintext);
            scraperwiki::save(array('site'), $record);
            $count++;
        }
        ++$page;
    }
    print $count;
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:19,代碼來源:top_site_it.php

示例15: data_from_overview_page

function data_from_overview_page($url, $type)
{
    $html = scraperWiki::scrape($url);
    $dom = new simple_html_dom();
    $dom->load($html);
    $count = 0;
    $base_url = 'http://www.dnr.state.mn.us';
    foreach ($dom->find(".paddingbig table tr table tr") as $rows) {
        $count++;
        $data = $rows->find("td");
        $link_image = $data[0]->find("a");
        $image = $data[0]->find("img");
        $link_text = $data[1]->find("a");
        $name = $link_text[0]->plaintext;
        if (!empty($data[0]->plaintext)) {
            $record = array('id' => $type . '--' . strtolower(str_replace(' ', '-', $name)), 'type' => $type, 'name' => $name, 'link' => !empty($link_image[0]->href) ? $base_url . $link_image[0]->href : '', 'thumb_url' => !empty($image[0]->src) ? $image[0]->src : '', 'timestamp' => time());
            scraperwiki::save(array('id'), $record);
        }
    }
}
開發者ID:flyeven,項目名稱:scraperwiki-scraper-vault,代碼行數:20,代碼來源:mn_dnr_invasive_species.php


注:本文中的scraperwiki::save方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。