当前位置: 首页>>代码示例>>PHP>>正文


PHP scraperwiki::attach方法代码示例

本文整理汇总了PHP中scraperwiki::attach方法的典型用法代码示例。如果您正苦于以下问题:PHP scraperwiki::attach方法的具体用法?PHP scraperwiki::attach怎么用?PHP scraperwiki::attach使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scraperwiki的用法示例。


在下文中一共展示了scraperwiki::attach方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: array

            $company = $profile->find("//*[@id='main']/div[1]/div/div[1]/div[2]/div[1]/div[1]/span/span", 0)->plaintext;
        }
        $website = $profile->find("span.orange-text a", 0) ? $profile->find("span.orange-text a", 0)->href : '';
        if ($profile->find("div.blue3-empty-box div.content div.word-wrap", 0)) {
            $info = $profile->find("div.blue3-empty-box div.content div.word-wrap", 0)->plaintext;
        } else {
            $info = '';
        }
        $record = array('name' => $profile->find("//div/a/span", 1)->plaintext, 'company' => $company, 'phone' => $profile->find("strong.big-blue3-text span", 0)->plaintext, 'website' => $website);
        scraperwiki::save(array('company'), $record);
        //print json_encode($record) . "\n";
        scraperwiki::save_var('last', $profile_no);
    }
}
//scraperwiki::save_var('last', 0);
scraperwiki::attach("find_4n_profiles");
$links = scraperwiki::select("profile from find_4n_profiles.swdata");
require 'scraperwiki/simple_html_dom.php';
$profile = new simple_html_dom();
foreach ($links as $link) {
    set_time_limit(0);
    $profile_no = intval(str_replace('http://www.4networking.biz/Members/Details/', '', $link['profile']));
    if ($profile_no > scraperwiki::get_var('last')) {
        $html = scraperWiki::scrape($link['profile']);
        $profile->load($html);
        if (!($company = $profile->find("//*[@id='main']/div[1]/div/div[1]/div[2]/div[1]/div[1]/span/span", 0)->title)) {
            $company = $profile->find("//*[@id='main']/div[1]/div/div[1]/div[2]/div[1]/div[1]/span/span", 0)->plaintext;
        }
        $website = $profile->find("span.orange-text a", 0) ? $profile->find("span.orange-text a", 0)->href : '';
        if ($profile->find("div.blue3-empty-box div.content div.word-wrap", 0)) {
            $info = $profile->find("div.blue3-empty-box div.content div.word-wrap", 0)->plaintext;
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:4n_members.php

示例2: array

$routemap = array();
foreach ($routes as $route) {
    $routemap[$route['route']]['route'] = $route['route'];
    @($routemap[$route['route']]['coords'] .= $route['latitude'] . ',' . $route['longitude'] . ',2357' . "\n");
}
$theroutes = array();
$count = 0;
foreach ($routemap as $a_route) {
    $count++;
    $r = $a_route['route'];
    $c = $a_route['coords'];
    $theroutes[] = array('id' => $count, 'route' => $r, 'coords' => $c);
}
scraperwiki::save_sqlite(array("id"), $theroutes);
//Whoops, seems that doing 600 queries in under 80 seconds isn't a smart idea. This scraper attempts to aggregate coordinates into something usable.
scraperwiki::attach("tfl_bus_routes_scraper", "src");
$routes = scraperwiki::select("route, stop_name, latitude, longitude from src.tfl_buses where run = 1 order by sequence asc");
$routemap = array();
foreach ($routes as $route) {
    $routemap[$route['route']]['route'] = $route['route'];
    @($routemap[$route['route']]['coords'] .= $route['latitude'] . ',' . $route['longitude'] . ',2357' . "\n");
}
$theroutes = array();
$count = 0;
foreach ($routemap as $a_route) {
    $count++;
    $r = $a_route['route'];
    $c = $a_route['coords'];
    $theroutes[] = array('id' => $count, 'route' => $r, 'coords' => $c);
}
scraperwiki::save_sqlite(array("id"), $theroutes);
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:tfl_bus_route_aggregator.php

示例3: header

$sourcescraper = 'fys_api_1';
# scraperwiki::attach('irish-epa-licenses', 'lic');
#   $licenses = scraperwiki::select("* from lic.swdata");
//    $licenses = scraperwiki::getData('irish-epa-licenses');
$s = scraperwiki::attach($sourcescraper, $limit = 250);
header('Content-type: application/json');
print "{ \"items\": " . json_encode($s) . "}";
# Blank PHP
$sourcescraper = 'fys_api_1';
# scraperwiki::attach('irish-epa-licenses', 'lic');
#   $licenses = scraperwiki::select("* from lic.swdata");
//    $licenses = scraperwiki::getData('irish-epa-licenses');
$s = scraperwiki::attach($sourcescraper, $limit = 250);
header('Content-type: application/json');
print "{ \"items\": " . json_encode($s) . "}";
# Blank PHP
$sourcescraper = 'fys_api_1';
# scraperwiki::attach('irish-epa-licenses', 'lic');
#   $licenses = scraperwiki::select("* from lic.swdata");
//    $licenses = scraperwiki::getData('irish-epa-licenses');
$s = scraperwiki::attach($sourcescraper, $limit = 250);
header('Content-type: application/json');
print "{ \"items\": " . json_encode($s) . "}";
# Blank PHP
$sourcescraper = 'fys_api_1';
# scraperwiki::attach('irish-epa-licenses', 'lic');
#   $licenses = scraperwiki::select("* from lic.swdata");
//    $licenses = scraperwiki::getData('irish-epa-licenses');
$s = scraperwiki::attach($sourcescraper, $limit = 250);
header('Content-type: application/json');
print "{ \"items\": " . json_encode($s) . "}";
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:exhibit_json_1.php

示例4: str_replace

    $txt = str_replace("</p>", "", $txt);
    $txt = preg_replace('/\\s+/', ' ', $txt);
    return $txt;
}
function searchForId($id, $array)
{
    foreach ($array as $key => $val) {
        if ($val['COTTAGE_URL'] === $id) {
            return $key;
        }
    }
    return null;
}
$blacklist = array();
$url = "http://www.coastandcountry.co.uk/cottage-details/";
scraperwiki::attach("coastandcountrycouk");
# get an array of the cottage data to scrape
$cottData = scraperwiki::select("COTTAGE_URL, PRICE_HIGH, PRICE_LOW from 'coastandcountrycouk'.SWDATA order by COTTAGE_URL");
$placeholder = scraperwiki::get_var("cottID");
if ($placeholder != "") {
    $index = searchForId($placeholder, $cottData);
    $cottData = array_splice($cottData, $index);
}
require 'scraperwiki/simple_html_dom.php';
$dom = new simple_html_dom();
foreach ($cottData as $value) {
    scraperwiki::save_var("cottID", $value['COTTAGE_URL']);
    // check the cottage url against the blacklist
    foreach ($blacklist as $blItem) {
        if ($value['COTTAGE_URL'] == $blItem) {
            continue 2;
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:coast_and_country.php

示例5: header

//print "{ \"items\": ".json_encode($alltrips) ."}";
$callback = $_GET['callback'];
if ($callback) {
    header("Content-Type: text/javascript; charset=utf8");
    echo $callback . "(" . json_encode($alltrips) . ");";
} else {
    header("Content-type: application/json");
    echo json_encode($alltrips);
}
// {label}    {id}    {type}    {day}    {date}            {year}    {time}    {startdate}    {latlng}        {arasnotaras}    {details}        {place}    {act}    {issue}    {constitutional}    {destf}    {address}    {days}        {destination}
?>
      <?php 
//$sourcescraper = 'irish_president_engagementstest';
//$s = scraperwiki::scrape($sourcescraper, $limit=250);
// = scraperwiki::attach($sourcescraper, $limit=250);
scraperwiki::attach('irish_president_engagementsjson');
$trips = scraperwiki::select("* from irish_president_engagementsjson.swdata where date > date('now','-7 day');");
$alltrips = array();
foreach ($trips as $trip) {
    $tripinfo = $trip["info"];
    $triplabel = $trip["label"];
    $tripinfo = str_replace('(', '', $tripinfo);
    $tripinfo = str_replace(')', ',', $tripinfo);
    $triplabel = str_replace('(', '', $triplabel);
    $triplabel = str_replace(')', ',', $triplabel);
    //print $triplabel;
    $trip["info"] = $tripinfo;
    $trip["label"] = $triplabel;
    $alltrips[] = $trip;
}
//header('Content-type: application/json');
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:exhibit_jsoncopy_1.php

示例6: foreach

</itunes:owner>

<?php 
// .. CREACION DEL ARRAY
foreach ($data as $item) {
    echo "        <item>\n";
    echo "            <title>" . $item['artist'] . " - " . $item['title'] . "</title>\n";
    echo "            <enclosure url=\"" . $item['url'] . "\" type=\"audio/mpeg\" />\n";
    echo "            <guid>" . $item['loved_count'] . "</guid>\n";
    echo "        </item>\n";
}
?>
</channel>
</rss><?php 
scraperwiki::httpresponseheader('Content-Type', 'application/atom+xml');
scraperwiki::attach("exfm");
$data = scraperwiki::select("* from exfm.swdata");
?>
<rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" version="2.0">

<channel>
<title>TuMusika Evolution Podcast</title>
<link>http://www.tumusika.net/</link>
<language>es-es</language>
<itunes:owner>
<itunes:name>TuMusika Evolution</itunes:name>
<itunes:email>darkgiank@darkgiank.com</itunes:email>
</itunes:owner>

<?php 
// .. CREACION DEL ARRAY
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:exfmrss.php

示例7: simple_html_dom

<?php

require 'scraperwiki/simple_html_dom.php';
$dom = new simple_html_dom();
scraperwiki::attach("test_1_2");
$result = scraperwiki::sqliteexecute("select html from hotel_list_pages");
$hotel_list_pages_contents = $result->data;
foreach ($hotel_list_pages_contents as $contents) {
    $html = $contents[0];
    $dom->load($html);
    foreach ($dom->find("table.hotellist tr") as $data) {
        $tds = $data->find("td h3 a");
        $record = array('hotel' => $tds[0]->plaintext, 'url' => $tds[0]->href);
        scraperwiki::save_sqlite(array('hotel'), $record, $table_name = 'hotel_list');
    }
}
require 'scraperwiki/simple_html_dom.php';
$dom = new simple_html_dom();
scraperwiki::attach("test_1_2");
$result = scraperwiki::sqliteexecute("select html from hotel_list_pages");
$hotel_list_pages_contents = $result->data;
foreach ($hotel_list_pages_contents as $contents) {
    $html = $contents[0];
    $dom->load($html);
    foreach ($dom->find("table.hotellist tr") as $data) {
        $tds = $data->find("td h3 a");
        $record = array('hotel' => $tds[0]->plaintext, 'url' => $tds[0]->href);
        scraperwiki::save_sqlite(array('hotel'), $record, $table_name = 'hotel_list');
    }
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:30,代码来源:test_2_2.php

示例8: foreach

print '</ul>';
$data = scraperwiki::select("count(*) AS c, subject AS s FROM contact INNER JOIN contact_subject ON contact_subject.contact_id=contact.contact_id WHERE behalf='{$w}' GROUP BY subject ORDER BY subject DESC");
print "<h2>Subjects covered</h2><ul>";
foreach ($data as $row) {
    extract($row);
    ?>
<li><?php 
    echo $s;
    ?>
 (<?php 
    echo $c;
    ?>
)</li><?php 
}
print "</ul>";
scraperwiki::attach("communication_log");
$who = $_SERVER['URLQUERY'];
$w = urldecode($who);
# Note that we don't bother to SQL escape our arg - we have a read-only connection already, so meh.
$data = scraperwiki::select("contact.contact_id AS id, person, title, organization, uri, date_contact_h FROM contact INNER JOIN victim ON victim.contact_id=contact.contact_id WHERE behalf='{$w}' ORDER BY date_contact_c DESC");
$orgs = array();
print "<h2>Contacts on behalf of {$w}</h2><ul>";
foreach ($data as $row) {
    extract($row);
    $sub = scraperwiki::select("subject FROM contact_subject WHERE contact_id='{$id}' ORDER BY subject");
    $s = array();
    foreach ($sub as $sRow) {
        $s[] = $sRow['subject'];
    }
    $s = join(', ', $s);
    ?>
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:communication_log_lobbier.php

示例9: fourOhFour

            return $key;
        }
    }
    return null;
}
# Check for pages with no usable data
function fourOhFour($html)
{
    if (strpos($html, 'Home not available', 1200) !== false) {
        return true;
    }
    return false;
}
$blacklist = array();
# get an array of the cottage data to scrape
scraperwiki::attach("bluechip_summary");
$cottData = scraperwiki::select(" COTTAGE_ID, COTTAGE_URL, SLEEPS, BEDROOMS, FEATURES,COTTAGE_NAME, PRICE_LOW from 'bluechip_summary'.SWDATA order by COTTAGE_URL");
$placeholder = scraperwiki::get_var("cottURL");
if ($placeholder != "") {
    $index = searchForId($placeholder, $cottData);
    $cottData = array_splice($cottData, $index);
}
require 'scraperwiki/simple_html_dom.php';
$dom = new simple_html_dom();
foreach ($cottData as $value) {
    $highPrice = "";
    $lowPrice = "";
    $found = 0;
    $count = 0;
    scraperwiki::save_var("cottURL", $value['COTTAGE_URL']);
    // check the cottage url against the blacklist
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:bluechip_full.php

示例10: substr

<?php

$sourcescraper = 'nh_gencourt_votes';
scraperwiki::attach('nh_gencourt_votes');
$data = scraperwiki::select("* from nh_gencourt_votes.bill_votes \n         order by \n             substr(date_of_vote, 7) || substr(date_of_vote, 1, 2) || substr(date_of_vote, 4, 2) desc, \n             cast(vote_num as int) desc\n        ");
?>
<!DOCTYPE html>
<html lang="en">
<head>
    <!-- http://twitter.github.com/bootstrap/base-css.html -->
    <link href="//netdna.bootstrapcdn.com/twitter-bootstrap/2.3.0/css/bootstrap-combined.min.css" rel="stylesheet">
    <script src="//ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>
    <script src="//netdna.bootstrapcdn.com/twitter-bootstrap/2.3.0/js/bootstrap.min.js"></script>
    <style>
        th {
            white-space: nowrap;
        }
    </style>
</head>
<body>
    <div class="container">
    <div class="page-header"><h1>NH House Bills</h1></div>
    <div class="row">
        <div class="span12">
            <table class="table table-striped table-bordered table-hover table-condensed">
                <tr>
                    <th>Date of Vote</th>
                    <th>Vote #</th>
                    <th>Bill #</th>
                    <th>Bill Title</th>
                    <th>Question/Motion</th>
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:nh_house_votes.php

示例11: fourOhFour

            return $key;
        }
    }
    return null;
}
# Check for pages with no usable data
function fourOhFour($html)
{
    if (strpos($html, 'Home not available', 1200) !== false) {
        return true;
    }
    return false;
}
$blacklist = array();
# get an array of the cottage data to scrape
scraperwiki::attach("hoseasons_summary");
$cottData = scraperwiki::select(" COTTAGE_ID, COTTAGE_URL, SLEEPS, BEDROOMS, PETS,COTTAGE_NAME, PRICE_LOW, PRICE_HIGH from 'hoseasons_summary'.SWDATA order by COTTAGE_URL");
$placeholder = scraperwiki::get_var("cottURL");
if ($placeholder != "") {
    $index = searchForId($placeholder, $cottData);
    $cottData = array_splice($cottData, $index);
}
require 'scraperwiki/simple_html_dom.php';
$dom = new simple_html_dom();
foreach ($cottData as $value) {
    $highPrice = "";
    $lowPrice = "";
    $found = 0;
    $count = 0;
    scraperwiki::save_var("cottURL", $value['COTTAGE_URL']);
    // check the cottage url against the blacklist
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:hoseasons_full.php

示例12: array

        //no form for given combinations
        return array();
    } else {
        //get dom
        $dom = new simple_html_dom();
        $dom->load($html);
        $orgs_obj = $dom->find('select[name=icoNam]', 0)->find('option');
        foreach ((array) $orgs_obj as $org) {
            $data[] = array('value' => $org->value, 'label' => trim($org->innertext));
        }
        return $data;
    }
}
require 'scraperwiki/simple_html_dom.php';
//read the saved tables
scraperwiki::attach("cz_public_organizations_ufis_basics", "src");
$dris = scraperwiki::select("* from src.dri order by value");
$periods = scraperwiki::select("* from src.period order by value");
$forms = scraperwiki::select("* from src.form order by value");
$chapters = scraperwiki::select("* from src.chapter order by value");
$regions = scraperwiki::select("* from src.region order by value");
$periods = array('0' => array('value' => '12/2012'));
//temp!!
//$forms = array('0' => array('value' => 50)); //temp
//scraperwiki::save_var('last_c',4); //temp
$d = scraperwiki::get_var('last_d', 0);
$p = scraperwiki::get_var('last_p', 0);
$f = scraperwiki::get_var('last_f', 0);
$c = scraperwiki::get_var('last_c', 0);
$r = scraperwiki::get_var('last_r', 0);
foreach ((array) $dris as $dkey => $dri) {
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:cz_public_organizations_ufis_ids.php

示例13: foreach

    //foreach row save info
    foreach ((array) $rows as $row) {
        //inner org_id is in <a href= ... it is used for getting details from the system
        $as = $row->find("a");
        $tmp_text = $as[0]->href;
        $inner_org_id = substr($tmp_text, $len);
        //<td>
        $tds = $row->find("td");
        //save the data
        $out = array('org_id' => trim($tds[0]->plaintext), 'short_name' => trim($tds[1]->plaintext), 'inner_org_id' => $inner_org_id, 'chapter' => $data_row['chapter']);
        scraperwiki::save_sqlite(array('org_id'), $out);
    }
}
require 'scraperwiki/simple_html_dom.php';
//read the data saved from downloader
scraperwiki::attach("cz_public_organizations_2_downloader", "src");
$data = scraperwiki::select("* from src.swdata");
//helper
$len = strlen("/cgi-bin/ufisreg/detail.pl?org=");
foreach ((array) $data as $data_row) {
    //get dom from data
    $dom = new simple_html_dom();
    $dom->load($data_row['html']);
    //extract information
    $rows = $dom->find("tr");
    //first row is the header, removing it
    array_shift($rows);
    //foreach row save info
    foreach ((array) $rows as $row) {
        //inner org_id is in <a href= ... it is used for getting details from the system
        $as = $row->find("a");
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:cz_public_organizations_2_retrieval.php

示例14: get_first_string

    return $result;
}
/**
 * finds 1st substring between opening and closing markers
 * @return result 1st substring
 */
function get_first_string($text, $openingMarker, $closingMarker)
{
    $out_ar = returnSubstrings($text, $openingMarker, $closingMarker);
    $out = $out_ar[0];
    return $out;
}
//retrieves data about voting members of assembly from https://scraperwiki.com/scrapers/cz_praha_voting_records_retrieval/
//2010-2014
require 'scraperwiki/simple_html_dom.php';
scraperwiki::attach("cz_praha_voting_records_retrieval", "src");
$rows = scraperwiki::select("distinct(mp_id) from src.mp_vote");
foreach ($rows as $row) {
    $url = "http://www.praha.eu/jnp/cz/home/volene_organy/zastupitelstvo_hmp/slozeni_zastupitelstva/index.html?memberId=" . $row['mp_id'];
    $html = scraperwiki::scrape($url);
    $dom = new simple_html_dom();
    $dom->load($html);
    $part = get_first_string($html, '</h2>', '<div>');
    $name = trim($dom->find('h2', 0)->plaintext);
    $email = get_first_string($part, 'mailto:', '"');
    $party = trim(get_first_string($part, 'Strana:</span>', '<br'));
    $club = trim(get_first_string(get_first_string($part, 'Klub:</span>', '</a') . '::', '">', '::'));
    $data[] = array('id' => $row['mp_id'], 'name' => $name, 'party' => $party, 'club' => $club);
}
scraperwiki::save_sqlite(array('id'), $data, 'info');
/**
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:cz_praha_members_of_assembly.php

示例15: fetchRegionHydroData

function fetchRegionHydroData($dbname, $aliasname)
{
    scraperwiki::attach($dbname, $aliasname);
    debug_tables($aliasname);
    updateHydroApplications($aliasname, '');
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:6,代码来源:hydro_planning_applications_3.php


注:本文中的scraperwiki::attach方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。