当前位置: 首页>>代码示例>>PHP>>正文


PHP scraperwiki::save_metadata方法代码示例

本文整理汇总了PHP中scraperwiki::save_metadata方法的典型用法代码示例。如果您正苦于以下问题:PHP scraperwiki::save_metadata方法的具体用法?PHP scraperwiki::save_metadata怎么用?PHP scraperwiki::save_metadata使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scraperwiki的用法示例。


在下文中一共展示了scraperwiki::save_metadata方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: time

$date = time();
$arr = array("name", "breed", $date, $latlng);
# Metadata functions.
$latest_message = scraperwiki::get_metadata('keyname', $default = 'No message yet');
print $latest_message;
$latest_message = 'Scraper input';
scraperwiki::save_metadata('latest_message', $latest_message);
$arr = array("breed", "name");
# Test scraper for PHP language.
# Should contain all our documented PHP functions.
# A fail in this scraper indicates a code failure somewhere.
require 'scraperwiki/simple_html_dom.php';
# Scrape function.
# TODO: Clarify, can we send POST parameters? Does not fail.
$arr = array("foo" => "bar");
$html = scraperwiki::scrape("http://scraperwiki.com/hello_world.html", $arr);
print $html;
# Geo function.
$latlng = scraperwiki::gb_postcode_to_latlng("E1 5AW");
print $latlng[0];
# Save function including date and latlng.
$arr = array('name' => 'Fluffles', 'breed' => 'Alsatian');
scraperwiki::save(array('name'), $arr);
$date = time();
$arr = array("name", "breed", $date, $latlng);
# Metadata functions.
$latest_message = scraperwiki::get_metadata('keyname', $default = 'No message yet');
print $latest_message;
$latest_message = 'Scraper input';
scraperwiki::save_metadata('latest_message', $latest_message);
$arr = array("breed", "name");
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:testing_for_php.php

示例2: processShow

//Lost
//$series_string[""] = ""; // Dummy
/* Specify a show to process. If none is specified, will process all shows. */
//$show = "Cali";
/* actual code begins here */
if (isset($show)) {
    $scrape_url = "http://www.imdb.com/title/" . $series_string[$show] . "/episodes";
    processShow($scrape_url);
} else {
    foreach ($series_string as $show => $temp) {
        $scrape_url = "http://www.imdb.com/title/" . $series_string[$show] . "/episodes";
        processShow($scrape_url);
    }
}
$keys = array('series_title', 'nr', 'season', 'episode', 'airdate', 'link', 'airdate', 'episode_title', 'description');
scraperwiki::save_metadata('data_columns', $keys);
/* function declarations */
/* this function will do all of the scrapping, string-matching and saving */
function processShow($url)
{
    $html = scraperwiki::scrape($url);
    $regexp_show = '|<h1><small>Episode list for<br></small><a [^>]*>&#x22;([^&]*)&#x22;</a>|';
    preg_match($regexp_show, $html, $arr);
    $series_title = $arr[1];
    $regexp = "|<div class=\"filter-all filter-year-([0-9]{4,4})\"><hr /><table cellspacing=\"0\" cellpadding=\"0\"><tr> <td valign=\"top\"><div class=\"episode_slate_container\"><div class=[^>]*></div></div></td> <td valign=\"top\"><h3>Season ([[:digit:]]*), Episode ([[:digit:]]*): <a href=\"(/title/[[:alnum:]]*/)\">([^<]*)</a></h3><span class=\"less-emphasis\">Original Air Date&mdash;<strong>([^<]*)</strong></span><br>([^<]*)[^\n]*</td></tr></table></div>|";
    preg_match_all($regexp, $html, $arr, PREG_SET_ORDER);
    $keys = array('series_title', 'nr', 'season', 'episode', 'airdate', 'link', 'airdate', 'episode_title', 'description');
    $i = 0;
    foreach ($arr as $val) {
        $i++;
        $data = array('series_title' => clean($series_title), 'nr' => $i, 'season' => clean($val[2]), 'episode' => clean($val[3]), 'airdate' => clean(date('d.m.Y', strtotime($val[6]))), 'link' => clean('http://www.imdb.com' . $val[4]), 'episode_title' => clean($val[5]), 'description' => clean($val[7]));
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:imdb_episode_data_2.php

示例3: begin

function begin()
{
    $dateFrom = scraperwiki::get_metadata("datefrom", "1980-01-01");
    $dateTo = scraperwiki::get_metadata("dateto", "1980-01-01");
    $dayFrom = $dateFrom;
    $dayTo = $dateTo;
    for ($i = 1; $i <= 1; $i++) {
        $nextDay = mktime(0, 0, 0, date("m", strtotime($dayFrom)), date("d", strtotime($dayFrom)) + 1, date("y", strtotime($dayFrom)));
        $dayFrom = date('Y-m-d', $nextDay);
        mainDate($dayFrom);
        scraperwiki::save_metadata("datefrom", date('Y-m-d', strtotime($dayFrom)));
        scraperwiki::save_metadata("dateto", date('Y-m-d', strtotime($dayTo)));
    }
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:14,代码来源:leeds-planning-php.php

示例4: unset

    }
    unset($tabletrs);
    $dom->__destruct();
    unset($dom);
    unset($html);
    scraperwiki::save(array('ID', 'sector'), $record);
    sleep(2);
    return $record;
}
$sourcescraper = 'tedscrapper';
$lasttime = scraperwiki::get_metadata('lasttime', -1);
scraperwiki::attach($sourcescraper);
print "Querying for data\n";
$data = scraperwiki::sqliteexecute("select distinct time, sector, url from tedscrapper.swdata where time >= '" . $lasttime . "' order by time");
print count($data->data) . " items to process\n";
$count = 0;
foreach ($data->data as $ind => $item) {
    $time = $item[0];
    $sector = $item[1];
    $url = $item[2];
    $record = scrapeTEDDataPage($url, $sector);
    print $count++ . " " . $record['TI'] . " " . memory_get_usage() / 1024 / 1024 . "MB\n";
    scraperwiki::save_metadata('lasttime', $time);
    unset($item);
    unset($record);
    unset($time);
    unset($sector);
    unset($url);
    unset($data->data[$ind]);
    unset($ind);
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:tedscrapper_page.php

示例5: trim

            $legal = trim($legal[1][0]);
        } else {
            $legal = '';
        }
        preg_match_all('|<div class="assoc">Primary contact address</div>(.*?)<div|', $code, $primary);
        if (isset($primary[1][0])) {
            $primary = trim($primary[1][0]);
        } else {
            $primary = '';
        }
        $primary = parseAddress($primary);
        $legal = parseAddress($legal);
        if (trim($name) != '') {
            scraperwiki::save(array('num'), array('num' => "" . clean($num), 'name' => clean($name), 'trading' => clean($trading), 'legal_address' => clean($legal['address']), 'legal_phone' => clean($legal['phone']), 'legal_fax' => clean($legal['fax']), 'legal_email' => clean($legal['email']), 'legal_web' => clean($legal['web']), 'primary_address' => clean($primary['address']), 'primary_phone' => clean($primary['phone']), 'primary_fax' => clean($primary['fax']), 'primary_email' => clean($primary['email']), 'primary_web' => clean($primary['web']), 'primary_courses' => clean($primary['courses'])));
        }
        scraperwiki::save_metadata('counter', $counter);
    }
}
function parseAddress($val)
{
    preg_match_all('|<strong>Telephone: </strong>(.*?)<br />|', $val, $phone);
    if (isset($phone[1][0])) {
        $dat['phone'] = trim($phone[1][0]);
    } else {
        $dat['phone'] = '';
    }
    preg_match_all('|<strong>E-mail: </strong><a href="mailto:(.*?)">.*?</a><br />|', $val, $email);
    if (isset($email[1][0])) {
        $dat['email'] = trim($email[1][0]);
    } else {
        $dat['email'] = '';
开发者ID:scottbw,项目名称:ukrlp-1,代码行数:31,代码来源:scraper.php

示例6: trim

    $formatClubName = trim(preg_replace('/\\s+/', ' ', $clubName));
    $_GLOBAL['clubs'][] = $formatClubName;
    echo 'running ' . $formatClubName . "\n";
    foreach ($dom->find('table', 2)->find('tr') as $row) {
        if (is_numeric($row->find('td', 0)->plaintext)) {
            $year = trim($row->find('td', 0)->plaintext);
            $position = trim(str_replace('&nbsp;', '', $row->find('td', 1)->plaintext));
            if (trim($position) == 'Champion') {
                $position = 1;
            }
            $leagueLevel = trim($row->find('td', 2)->plaintext);
            $overallPosition = trim($row->find('td', 3)->plaintext);
            $avgAttendance = trim(str_replace('.', '', $row->find('td', 4)->plaintext));
            $totalAttendance = trim(str_replace('.', '', $row->find('td', 12)->plaintext));
            $dataset = array('club' => $formatClubName, 'year' => $year, 'finishedPosition' => $position, 'league' => $leagueLevel, 'overallPosition' => $overallPosition, 'avgAttendance' => $avgAttendance, 'totalAttendance' => $totalAttendance);
            scraperwiki::save(array('club', 'year'), $dataset);
        }
    }
    /*
     * The next to lines stop a memory leak in Simple XML as per http://simplehtmldom.sourceforge.net/manual_faq.htm#memory_leak
     */
    $dom->clear();
    unset($dom);
}
foreach ($frontDom->find('a') as $link) {
    if (strpos($link->href, 'attnclub') !== FALSE) {
        clubURL('http://www.european-football-statistics.co.uk/' . $link->href);
    }
}
scraperwiki::save_metadata('Clubs', implode(',', $_GLOBAL['clubs']));
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:30,代码来源:pauls-hmhse-scraper.php


注:本文中的scraperwiki::save_metadata方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。