本文整理匯總了PHP中scraperwiki::scrape方法的典型用法代碼示例。如果您正苦於以下問題:PHP scraperwiki::scrape方法的具體用法?PHP scraperwiki::scrape怎麽用?PHP scraperwiki::scrape使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類scraperwiki
的用法示例。
在下文中一共展示了scraperwiki::scrape方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的PHP代碼示例。
示例1: parseModelsPage
function parseModelsPage($brandId, $brandName, $page)
{
$html_content = scraperwiki::scrape($page);
$this->html = str_get_html($html_content);
foreach ($this->html->find("div.makers a") as $el) {
$img = $el->find('img', 0);
$m['name'] = $brandName . ' ' . $el->find('strong', 0)->innertext;
$m['img'] = $img->src;
$m['link'] = 'http://www.gsmarena.com/' . $el->href;
$m['desc'] = $img->title;
$temp = explode('-', $el->href);
$m['id'] = (int) substr($temp[1], 0, -4);
$m['brand_id'] = $brandId;
scraperwiki::save_sqlite(array("id" => $m['id']), $m, "cell_model");
$this->models++;
}
$pagination = $this->html->find("div.nav-pages", 0);
if ($pagination) {
$nextPageLink = $pagination->lastChild();
if ($nextPageLink && $nextPageLink->title == "Next page") {
$this->parseModelsPage($brandId, $brandName, 'http://www.gsmarena.com/' . $nextPageLink->href);
}
}
$this->html->__destruct();
}
示例2: clubURL
function clubURL($url)
{
$html = scraperwiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$clubName = trim(str_replace(' ', '', $dom->find('table', 0)->find('tr', 2)->plaintext));
$formatClubName = trim(preg_replace('/\\s+/', ' ', $clubName));
$_GLOBAL['clubs'][] = $formatClubName;
echo 'running ' . $formatClubName . "\n";
foreach ($dom->find('table', 2)->find('tr') as $row) {
if (is_numeric($row->find('td', 0)->plaintext)) {
$year = trim($row->find('td', 0)->plaintext);
$position = trim(str_replace(' ', '', $row->find('td', 1)->plaintext));
if (trim($position) == 'Champion') {
$position = 1;
}
$leagueLevel = trim($row->find('td', 2)->plaintext);
$overallPosition = trim($row->find('td', 3)->plaintext);
$avgAttendance = trim(str_replace('.', '', $row->find('td', 4)->plaintext));
$totalAttendance = trim(str_replace('.', '', $row->find('td', 12)->plaintext));
$dataset = array('club' => $formatClubName, 'year' => $year, 'finishedPosition' => $position, 'league' => $leagueLevel, 'overallPosition' => $overallPosition, 'avgAttendance' => $avgAttendance, 'totalAttendance' => $totalAttendance);
scraperwiki::save(array('club', 'year'), $dataset);
}
}
/*
* The next to lines stop a memory leak in Simple XML as per http://simplehtmldom.sourceforge.net/manual_faq.htm#memory_leak
*/
$dom->clear();
unset($dom);
}
示例3: getCategories
function getCategories($u)
{
global $baseurl, $f;
$path = "";
$d = new simple_html_dom();
$d->load(scraperwiki::scrape($u));
echo "Loaded URL: " . $u . "\n";
if ($d->find('div[id=ctl00_cphContent_gsaCatFacetContainer]')) {
$breadcrumb = $d->find('div[id=breadcrumb]', 0);
//foreach($breadcrumb as $b) {
//echo "Breadcrumb = " . $b;}
if (!is_null($breadcrumb)) {
foreach ($breadcrumb->children() as $crumb) {
$path .= trim($crumb->innertext) . "/";
}
$path .= trim(strrchr($breadcrumb->innertext, ">"), "> ");
}
foreach ($d->find('div[id=ctl00_cphContent_gsaCatFacetContainer]', 0)->find('div[class=S2refinementsContainer]', 0)->children() as $div) {
$name = trim(strstr($div->children(0)->innertext, "(", true));
$url = $baseurl . $div->children(0)->href;
$data = array("Name" => $name, "Path" => $path, "URL" => $url);
echo $path . "/" . $name . "\n";
if ($local) {
fputcsv($f, array($name, $path, $url));
} else {
scraperwiki::save_sqlite(array("URL"), $data);
}
getCategories($url);
}
}
}
示例4: getIngredients
function getIngredients($html)
{
$i = 0;
$dom = new simple_html_dom();
$dom->load($html);
//foreach($dom->find('result-item',1)->href as $data)
//{
// if ($data != null)
//$res = trim($data->plaintext);
$res = $dom->find('a[class=callout]', 1)->href;
$res = str_replace("reviews/", "", $res);
echo "http://www.foodnetwork.com" . $res;
$html1 = scraperwiki::scrape("http://www.foodnetwork.com" . $res);
$domFoods = new simple_html_dom();
//$domFoods->load($html1);
$h = str_get_html($html1);
//echo $domFoods;
echo "\n\n";
foreach ($h->find('li[class=ingredient]') as $data) {
$ingredient = $data->plaintext;
if (isset($h->href)) {
$href = $h->href;
}
//foreach($domFoods->find('ul[class=kv-ingred-list1]',1)->children() as $data){
//echo $data->plaintext;
scraperwiki::save(array('ing'), array('ing' => $ingredient, 'href' => $href));
}
}
示例5: getProducts
function getProducts($u, $cat)
{
global $o;
$d = new simple_html_dom();
$d->load(scraperwiki::scrape($u));
//echo "Loaded URL: " . $u . "\n";
$items = $d->find('li.grid-item');
if (count($items) > 0) {
foreach ($items as $p) {
$prod = $p->find('p.product-name > a', 0);
$prodname = trim($prod->innertext);
$prodURL = $prod->href;
if (!is_null($p->find('p.minimal-price', 0))) {
$prodtype = 1;
} else {
$prodtype = 0;
}
fputcsv($o, array($prodname, $prodtype, $cat, $prodURL));
echo $prodname . "\n";
}
if (!is_null($d->find('p.next', 0))) {
getProducts($d->find('p.next', 0)->href, $cat);
}
}
}
示例6: ripById
function ripById($id)
{
$pathToDetails = 'http://beheshtezahra.tehran.ir/Default.aspx?tabid=92&ctl=SearchDetails&mid=653&srid=' . $id;
$output = scraperwiki::scrape($pathToDetails);
$firstnamepattern = '/<span id="dnn_ctr653_SearchDetails_dtlDetail_lblNameBound_0"><b>(.*)<\\//smiU';
$surnamepattern = '/<span id="dnn_ctr653_SearchDetails_dtlDetail_lblLastNameBound_0"><b>(.*)<\\//smiU';
$fathernamepattern = '/<span id="dnn_ctr653_SearchDetails_dtlDetail_lblFatherNameBound_0"><b>(.*)<\\//smiU';
$birthdatepattern = '/<span id="dnn_ctr653_SearchDetails_dtlDetail_lblBirthDateBound_0"><b>(.*)<\\//smiU';
$deathdatepattern = '/<span id="dnn_ctr653_SearchDetails_dtlDetail_lblDafnDateBound_0"><b>(.*)<\\//smiU';
$deathplacepattern = '/<span id="dnn_ctr653_SearchDetails_dtlDetail_lblDeastTownshipTitle_0"><b>(.*)<\\//smiU';
$graveplacepattern = '/<span id="dnn_ctr653_SearchDetails_dtlDetail_lblDafnPlace_0"><b>(.*)<\\//smiU';
preg_match($firstnamepattern, $output, $temp);
$firstname = isset($temp[1]) ? $temp[1] : '';
preg_match($surnamepattern, $output, $temp);
$surname = isset($temp[1]) ? $temp[1] : '';
preg_match($fathernamepattern, $output, $temp);
$fathername = isset($temp[1]) ? $temp[1] : '';
preg_match($birthdatepattern, $output, $temp);
$birthdate = isset($temp[1]) ? $temp[1] : '';
preg_match($deathdatepattern, $output, $temp);
$deathdate = isset($temp[1]) ? $temp[1] : '';
preg_match($deathplacepattern, $output, $temp);
$deathplace = isset($temp[1]) ? $temp[1] : '';
preg_match($graveplacepattern, $output, $temp);
$graveplace = isset($temp[1]) ? $temp[1] : '';
scraperwiki::save_sqlite(array('data'), array('id' => $id, 'firstname' => $firstname, 'surname' => $surname, 'fathername' => $fathername, 'birthdate' => $birthdate, 'deathdate' => $deathdate, 'deathplace' => $deathplace, 'graveplace' => $graveplace));
}
示例7: do_day
function do_day($rec)
{
$html = scraperwiki::scrape($rec['url']);
$dom = new simple_html_dom();
$dom->load($html);
$cell = $dom->find('a[name=discs]');
$lines = $cell[0]->parent->find('text');
print $lines[10] . "\n";
print count($lines) . "\n";
# loop by number, as null lines stop a foreach
$n = 0;
for ($line_no = 0; $line_no < count($lines); $line_no++) {
$line = $lines[$line_no];
if (strlen($line) == 3) {
# the DOM object crashes on this row, so ignore
continue;
}
#if (preg_match("#^" . $n . "#", $line, $matches)) {
print $line_no . " " . strlen($line) . "\n";
$n = $n + 1;
print $line . "\n";
#}
}
#scraperwiki::save(array('data'), array('data' => $data->plaintext));
}
示例8: handle_products
function handle_products($product_link)
{
global $base_url_host, $base_url_scheme, $total;
if (!empty($product_link)) {
$link_3 = $product_link;
$cat_raw = str_replace("http://www.thule.com/en-US/US/Products/", "", $product_link);
$cats = dirname($cat_raw);
$cat_terms = array("Base-Racks/Feet", "Base-Racks/LoadAccessories", "Base-Racks/LoadBars", "Bike-Carriers/Accessories", "Bike-Carriers/Hitch", "Bike-Carriers/RearDoor", "Bike-Carriers/RoofCarriers", "Bike-Carriers/SpareTire", "Bike-Carriers/TruckBed", "Cargo-Carriers/Bags", "Cargo-Carriers/Baskets", "Cargo-Carriers/Boxes", "Cargo-Carriers/HitchCargo", "Luggage/DaypacksAndMessengers", "Luggage/LaptopAndTablet", "Luggage/LuggageAndDuffels", "Snow-Chains/SnowChains", "Snowsports/Accessories", "Snowsports/HitchSki", "Snowsports/SkiBoxes", "Snowsports/SkiCarriers", "Watersports/Accessories", "Watersports/WatersportCarriers");
$cat_cleaned = array("Base Racks/Feet", "Base Racks/Load Accessories", "Base Racks/Load Bars", "Bike Carriers/Accessories", "Bike Carriers/Hitch", "Bike Carriers/Rear Door", "Bike Carriers/Roof Carriers", "Bike Carriers/Spare Tire", "Bike Carriers/Truck Bed", "Cargo Carriers/Bags", "Cargo Carriers/Baskets", "Cargo Carriers/Boxes", "Cargo Carriers/Hitch Cargo", "Luggage/Daypacks And Messengers", "Luggage/Laptop And Tablet", "Luggage/Luggage And Duffels", "Snow Chains/Snow Chains", "Snowsports/Accessories", "Snowsports/Hitch Ski", "Snowsports/Ski Boxes", "Snowsports/Ski Carriers", "Watersports/Accessories", "Watersports/Watersport Carriers");
$cat = str_replace($cat_terms, $cat_cleaned, $cats);
$html_content = scraperwiki::scrape($link_3);
$html = str_get_html($html_content);
$name_raw = trim($html->find("div[@class='column details_overview'] h2 span", 0));
$name = !empty($name_raw) ? strip_tags($name_raw) : "";
$desc_raw = trim($html->find("div[@class='column details_overview'] h3 span", 0));
$desc = !empty($desc_raw) ? strip_tags($desc_raw) : "";
$price_raw = trim($html->find("div[@class='pricing'] span[@id='phcontent_0_ctl00_lblPriceText']", 0));
$price = strip_tags($price_raw);
$price = str_replace("MSRP \$", "", $price);
$price = trim(str_replace(" (USD)", "", $price));
$image = $html->find("img[@id='imgProductBomImage_0']", 0)->src;
echo "{$name}: {$image}\n";
// Add it to an array.
$record = array('id' => $total, 'product_name' => trim($name), 'desciption' => trim($desc), 'price' => $price, 'img' => $image, 'category' => $cat);
// Add it to the table.
scraperwiki::save_sqlite(array('id'), array($record), "products_support", 2);
// Increment the 'id' counter.
$total++;
}
}
示例9: scrapeDetails
function scrapeDetails($ngo)
{
$html_content = scraperwiki::scrape($ngo["url"]);
$dom = new simple_html_dom();
$dom->load($html_content);
$infosWeWant = array('Telefon', 'Rechtsform', 'Steuerstatus', 'Weltanschauliche Ausrichtung', 'Anzahl Mitarbeiter', 'Gesamteinnahmen:', 'Davon Sammlungseinnahmen', 'Bezugsjahr:');
// Scrape Details from all paragraphs
$paragraphs = $dom->find('p');
foreach ($paragraphs as $p) {
if (strstr($p->plaintext, "Website")) {
$ngo["website"] = $p->find('a', 0)->href;
}
if (strstr($p->plaintext, "Email")) {
$ngo["email"] = $p->find('a', 0)->plaintext;
}
foreach ($infosWeWant as $key => $info) {
$res = extractInfo($p, $info);
if ($res) {
$ngo[$info] = $res;
//Do not search for this info again
unset($infosWeWant[$key]);
}
}
}
print_r($ngo);
return $ngo;
}
示例10: ripById
function ripById($id)
{
$pathToDetails = 'http://www.shborujen.ir/DesktopModules/eFormViewer/eFormViewerEdit.aspx?TabID=4753&Site=DouranPortal&MId=14286&Lang=fa-IR&ItemID=1&fID=1228&keyID=itemid%7C' . $id;
$output = scraperwiki::scrape($pathToDetails);
$firstnamepattern = '/<input name="eFormEditData1228\\$field1421\\$controlToValidate_Field72\\$Field72_Value".*" value="(.*)".*>/smiU';
$surnamepattern = '/<input name="eFormEditData1228\\$field1415\\$controlToValidate_Field73\\$Field73_Value.*" value="(.*)".*>/smiU';
$fathernamepattern = '/<input name="eFormEditData1228\\$field1416\\$controlToValidate_Field74\\$Field74_Value.*value="(.*)".*>/smiU';
$deathdatepattern = '/<input name="eFormEditData1228\\$field1418\\$ctl00\\$txt.*" value="(.*)".*>/smiU';
$blockpattern = '/<input name="eFormEditData1228\\$field1414\\$controlToValidate_Field78\\$Field78_Value.*" value="(.*)".*>/smiU';
$rowpattern = '/<input name="eFormEditData1228\\$field1434\\$controlToValidate_Field1434\\$Field1434_Value.*" value="(.*)".*>/smiU';
$placepattern = '/<input name="eFormEditData1228\\$field1413\\$controlToValidate_Field77\\$Field77_Value.*" value="(.*)".*>/smiU';
$gravepattern = '/<input name="eFormEditData1228\\$field1439\\$controlToValidate_Field1439\\$Field1439_Value.*" value="(.*)".*>/smiU';
preg_match($firstnamepattern, $output, $temp);
$firstname = isset($temp[1]) ? $temp[1] : '';
preg_match($surnamepattern, $output, $temp);
$surname = isset($temp[1]) ? $temp[1] : '';
preg_match($fathernamepattern, $output, $temp);
$fathername = isset($temp[1]) ? $temp[1] : '';
preg_match($deathdatepattern, $output, $temp);
$deathdate = isset($temp[1]) ? $temp[1] : '';
preg_match($placepattern, $output, $temp);
$place = isset($temp[1]) ? $temp[1] : '';
preg_match($rowpattern, $output, $temp);
$row = isset($temp[1]) ? $temp[1] : '';
preg_match($blockpattern, $output, $temp);
$block = isset($temp[1]) ? $temp[1] : '';
preg_match($gravepattern, $output, $temp);
$grave = isset($temp[1]) ? $temp[1] : '';
scraperwiki::save_sqlite(array('data'), array('id' => $id, 'firstname' => $firstname, 'surname' => $surname, 'fathername' => $fathername, 'birthdate' => $birthdate, 'deathdate' => $deathdate, 'place' => $place, 'block' => $block, 'row' => $row, 'grave' => $grave));
}
示例11: scraper
function scraper($url_search, $country_id)
{
$has_next = false;
$base_url = "http://ec.europa.eu/eures/eures-searchengine/servlet";
$html = scraperwiki::scrape($url_search);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find('table[class=JResult]') as $result) {
foreach ($result->find('td[class=JRTitle] a') as $job_page) {
$chars = explode("'", $job_page->onclick);
$url_job = $base_url . substr($chars[1], 1);
$url_id = strstr($url_job, 'uniqueJvId=');
$url_id = str_replace('uniqueJvId=', "", $url_id);
echo "JOB: " . $url_job . "<br />";
}
foreach ($result->find('th') as $data) {
$text = trim($data->plaintext);
if ($text == 'Description:') {
$description = trim($data->next_sibling()->plaintext);
echo "DESCRIPTION: " . $description . "<br />";
}
if ($text == 'Source:') {
$source = trim($data->next_sibling()->plaintext);
$source = str_replace("'", "\\'", $source);
if ($source != '' && $source != ' ') {
$source_id = insert_name('source', $source);
echo "SOURCE: " . $source . "<br /><br />";
}
}
}
$description = str_replace("'", "\\'", $description);
$description = str_replace("</BR>", "", $description);
$sql = mysql_query("SELECT * FROM job WHERE url = '{$url_job}'");
$cont = mysql_num_rows($sql);
if ($cont == 0) {
mysql_query("INSERT INTO job SET \n\t\t\t\t\turl = '{$url_job}', \n\t\t\t\t\turl_id = '{$url_id}', \n\t\t\t\t\tdescription = '{$description}', \n\t\t\t\t\tsource_id = '{$source_id}', \n\t\t\t\t\turl_search = '{$url_search}', \n\t\t\t\t\tcountry_id='{$country_id}',\n\t\t\t\t\turl_scraper_date = SYSDATE(),\t \n\t\t\t\t\turl_scraper_hour = SYSDATE()");
} else {
echo "Job URL already extracted: " . $url_job . "<br /><br />";
}
}
foreach ($dom->find('div[class=prevNext] a') as $next_page) {
$text = $next_page->plaintext;
if ($text == "Next page") {
$url_next = substr($next_page->href, 1);
$url_next = $base_url . $url_next;
$has_next = true;
print "<br /><br />NEXT: " . $url_next . "<br /><br />";
}
}
unset($html, $dom, $result, $job_page, $data, $next_page, $text, $url_id, $url_job, $description, $source, $source_id, $url_search);
//Comment this for tests, uncomment this to get all data
// if ($has_next == true){
// sleep(1);
// scraper($url_next, $country_id);
// }
}
示例12: ripByPage
function ripByPage($page)
{
$pathToDetails = 'http://aramestan.e-sanandaj.ir/BurialRequest/DeadSearch?keyword=&firstName=&lastName=&fatherName=&partNo=0&rowNo=&graveNo=&deathDateFrom=&deathDateTo=&bornDateFrom=&bornDateTo=&page=' . $page;
$output = scraperwiki::scrape($pathToDetails);
$resultingJsonObject = json_decode($output);
for ($id = 0; $id <= 9; $id++) {
$entry = array('id' => $resultingJsonObject->{'result'}[$id]->{'Id'}, 'fullname' => strVal($resultingJsonObject->{'result'}[$id]->{'DeadFullName'}), 'fathername' => strVal($resultingJsonObject->{'result'}[$id]->{'DeadFatherName'}), 'birthdate' => strVal($resultingJsonObject->{'result'}[$id]->{'BornDate'}), 'deathdate' => strVal($resultingJsonObject->{'result'}[$id]->{'DeathDate'}), 'partno' => strVal($resultingJsonObject->{'result'}[$id]->{'PartNo'}), 'rowno' => strVal($resultingJsonObject->{'result'}[$id]->{'RowNo'}), 'graveno' => strVal($resultingJsonObject->{'result'}[$id]->{'GraveNo'}), 'gender' => strVal($resultingJsonObject->{'result'}[$id]->{'Gender'}), 'identitycode' => strVal($resultingJsonObject->{'result'}[$id]->{'IdentityCode'}));
scraperwiki::save_sqlite(array('data'), $entry);
$pagecount = $resultingJsonObject->{'PageNumber'};
}
}
示例13: scrape
function scrape($source)
{
global $source, $utmSource, $utmMedium, $utmTerm, $utmContent, $utmCampaign;
$link = scraperwiki::scrape($source);
$html = str_get_html($link);
foreach ($html->find('a[href]') as $a) {
$href = $a->href;
$a->href = $href . '#utm_source=' . $utmSource . '&utm_medium=' . $utmMedium . '&utm_term=' . $utmTerm . '&utm_content=' . $utmContent . '&utm_campaign=' . $utmCampaign;
}
print $html;
}
示例14: getLangs
function getLangs()
{
$url = "http://mappings.dbpedia.org/server/statistics/";
$html = scraperwiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$i = 0;
$langs = array();
foreach ($dom->find('/html/body/p/a') as $result) {
$lang = str_replace("/", "", trim($result->href));
$langs[] = $lang;
}
return $langs;
}
示例15: scrapeIndex
function scrapeIndex($url)
{
$html_content = scraperwiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html_content);
$ngos = array();
foreach ($dom->find('h2') as $h2) {
$name = str_replace("–", "-", html_entity_decode($h2->plaintext));
$url = $h2->find('a', 0);
$url = $url->href;
$ngos[] = array("name" => $name, "url" => $url);
scraperwiki::save_sqlite(array("name"), array("name" => $name, "url" => $url), "ngos");
}
print_r($ngos);
return $ngos;
}