本文整理汇总了PHP中scrape函数的典型用法代码示例。如果您正苦于以下问题:PHP scrape函数的具体用法?PHP scrape怎么用?PHP scrape使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了scrape函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: add_btag
function add_btag($tag, $reg)
{
$api_url = 'https://www.hotslogs.com/API/Players/';
$battle_tag = implode('_', explode('#', $tag));
$u = $api_url . $reg . '/' . $battle_tag;
return scrape($u);
}
示例2: shouldScrapeByTerm
function shouldScrapeByTerm($term)
{
$cc = new cURL();
$output = $cc->get('http://www.nyu.edu/registrar/listings/');
$output = substring($output, 'Course data was last updated on ', '<form method="post" name="selForm" action="results.html">', true);
$date = substr($output, 0, strlen($output) - 5);
$scrapeDate = date('Y-m-d H:i:s', strtotime($date));
$sql = 'SELECT * FROM `nyu_courses_settings`';
$result = mysql_query($sql);
$row = mysql_fetch_assoc($result);
if (!(stripos($term, "Fall") === false)) {
$column = "latest_fall_scrape";
} else {
if (!(stripos($term, "Spring") === false)) {
$column = "latest_spring_scrape";
} else {
if (!(stripos($term, "Winter") === false)) {
$column = "latest_ws_scrape";
} else {
if (!(stripos($term, "Summer") === false)) {
$column = "latest_ws_scrape";
}
}
}
}
if ($row[$column] != $scrapeDate) {
scrape($term, $scrapeDate);
deleteOldData($term, $scrapeDate);
$sql = "UPDATE `nyu_courses_settings` SET `{$column}` = '{$scrapeDate}'";
mysql_query($sql);
}
}
示例3: scrape
function scrape($s, $offset)
{
// locate para
$p_start = strpos($s, "<p>", $offset);
$p_end = strpos($s, "</p>", $p_start);
if ($p_start === false || $p_end === false) {
return false;
}
// extract para
$p = substr($s, $p_start, $p_end - $p_start);
if ($p === false) {
return false;
}
// get the right link
$link = find_apt_link($p, 0);
// if link found return it
if ($link !== false) {
return $link;
} else {
return scrape($s, $p_end + 4);
}
}
示例4: array_push
array_push($results, $result);
}
//Clean loop variables
$option = null;
$result = null;
unset($option);
unset($result);
if ($i % 100 === 0) {
scraperwiki::save_sqlite(array('path'), $results);
$results = array();
}
}
//Clean up
$query = null;
$xpath = null;
$ch = null;
unset($query);
unset($xpath);
unset($ch);
//Final save to database
scraperwiki::save_sqlite(array('path'), $results);
$results = null;
unset($results);
echo "Peak memory usage: " . memory_get_peak_usage() . "\n";
}
$case_types = array(67, 57, 65, 66, 62, 61, 55, 12, 11, 8, 63, 58, 20, 37, 34, 42, 21, 39, 35, 5, 24, 27, 54, 6, 29, 7, 64, 53, 52, 16, 51, 15, 28, 14, 1, 25, 31, 3, 19, 33, 2, 10, 23, 32, 60, 56, 74, 36, 40, 68, 9, 26, 43, 73, 4, 22, 17, 50);
for ($year = date('Y'); $year < date('Y') + 1; $year++) {
foreach ($case_types as $case_type) {
scrape($year, $case_type);
}
}
示例5: updatedata
function updatedata()
{
global $CURRENTPATH, $TABLE_PREFIX, $btit_settings;
require_once $CURRENTPATH . '/getscrape.php';
global $update_interval;
if ($update_interval == 0) {
return;
}
$now = time();
$res = get_result("SELECT last_time as lt FROM {$TABLE_PREFIX}tasks WHERE task='update'", true, $btit_settings['cache_duration']);
$row = $res[0];
if (!$row) {
do_sqlquery("INSERT INTO {$TABLE_PREFIX}tasks (task, last_time) VALUES ('update',{$now})");
return;
}
$ts = $row['lt'];
if ($ts + $update_interval > $now) {
return;
}
do_sqlquery("UPDATE {$TABLE_PREFIX}tasks SET last_time={$now} WHERE task='update' AND last_time = {$ts}");
if (!mysqli_affected_rows($GLOBALS["___mysqli_ston"])) {
return;
}
$res = get_result("SELECT announce_url FROM {$TABLE_PREFIX}files WHERE external='yes' ORDER BY lastupdate ASC LIMIT 1", true, $btit_settings['cache_duration']);
if (!$res || count($res) == 0) {
return;
}
// get the url to scrape, take 5 torrent at a time (try to getting multiscrape)
$row = $res[0];
$resurl = get_result("SELECT info_hash FROM {$TABLE_PREFIX}files WHERE external='yes' AND announce_url='" . $row['announce_url'] . "' ORDER BY lastupdate ASC LIMIT 5", true, $btit_settings['cache_duration']);
if (!$resurl || count($resurl) == 0) {
return $combinedinfohash = array();
}
foreach ($resurl as $id => $rhash) {
$combinedinfohash[] = $rhash['info_hash'];
}
//scrape($row["announce_url"],$row["info_hash"]);
scrape($row[0], implode("','", $combinedinfohash));
}
示例6: scrape
function scrape($session, $session_id, $zone, $bench, $appeal_date, $page)
{
echo "Loading data ...\n";
$header = array('X-Prototype-Version: 1.4.0', 'X-Requested-With: XMLHttpRequest', 'SOAPAction: ""', 'Content-type: application/x-www-form-urlencoded text/xml; charset=UTF-8', 'request-type: SOAP');
$soap = '<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><Body xmlns="http://schemas.xmlsoap.org/soap/envelope/"><GetUpdatedObjects xmlns="http://schemas.eclipse.org/birt"><Operation><Target><Id>Document</Id><Type>Document</Type></Target><Operator>GetPage</Operator><Oprand><Name>Appeal No</Name><Value></Value></Oprand><Oprand><Name>__isdisplay__Appeal No</Name><Value></Value></Oprand><Oprand><Name>Serial No</Name><Value></Value></Oprand><Oprand><Name>__isdisplay__Serial No</Name><Value></Value></Oprand><Oprand><Name>Assessee Name</Name><Value>_</Value></Oprand><Oprand><Name>__isdisplay__Assessee Name</Name><Value></Value></Oprand><Oprand><Name>searchWhat</Name><Value>searchByDate</Value></Oprand><Oprand><Name>__isdisplay__searchWhat</Name><Value>searchByDate</Value></Oprand><Oprand><Name>AssType</Name><Value>null</Value></Oprand><Oprand><Name>__isdisplay__AssType</Name><Value>DontKnow</Value></Oprand><Oprand><Name>appealDate</Name><Value>' . $appeal_date . '</Value></Oprand><Oprand><Name>__isdisplay__appealDate</Name><Value>' . $appeal_date . '</Value></Oprand><Oprand><Name>Bench</Name><Value>' . $bench . '</Value></Oprand><Oprand><Name>__isdisplay__Bench</Name><Value>' . $bench . '</Value></Oprand><Oprand><Name>__page</Name><Value>' . $page . '</Value></Oprand><Oprand><Name>__svg</Name><Value>false</Value></Oprand><Oprand><Name>__page</Name><Value>' . $page . '</Value></Oprand><Oprand><Name>__taskid</Name><Value></Value></Oprand></Operation></GetUpdatedObjects></Body></soap:Envelope>';
$ch = curl_init();
curl_setopt_array($ch, array(CURLOPT_URL => 'http://www.itatonline.in:8080/itat/jsp/runBirt2.jsp?subAction=showReoprt&__report=CaseDetails1_' . $zone . '.rptdesign&searchWhat=searchByDate&Serial%20No=&Appeal%20No=&Assessee%20Name=&AssType=null&appealDate=' . urlencode($appeal_date) . '&Bench=' . $bench . '&__sessionId=' . $session_id, CURLOPT_COOKIE => $session, CURLOPT_HTTPHEADER => $header, CURLOPT_POST => true, CURLOPT_POSTFIELDS => $soap, CURLOPT_RETURNTRANSFER => true));
$dom = new DOMDocument();
@$dom->loadHTML('<html><body>' . htmlspecialchars_decode(curl_exec($ch)) . '</body></html>');
$xpath = new DOMXPath($dom);
$pages = $xpath->query('//updatedata/data/page');
if ($pages->item(0)) {
$results_page = intval($pages->item(0)->childNodes->item(0)->nodeValue);
} else {
echo 'No data found for ' . $zone . ' : ' . $bench . ' on ' . $appeal_date . " ...\n";
return false;
//no data
}
$total_pages = intval($pages->item(0)->childNodes->item(1)->nodeValue);
$query = $xpath->query('//tr[@class="style_9"]');
curl_close($ch);
$header = null;
$soap = null;
$xpath = null;
$pages = null;
$dom = null;
$ch = null;
unset($header);
unset($soap);
unset($xpath);
unset($pages);
unset($dom);
unset($ch);
echo "Parsing results ...\n";
$results = array();
$n = $query->length;
for ($i = 0; $i < $n; $i++) {
$row = $query->item($i);
@($result = array('appeal_number' => trim($row->childNodes->item(2)->nodeValue), 'assessee_name' => trim($row->childNodes->item(4)->nodeValue), 'filed_by' => trim($row->childNodes->item(6)->nodeValue), 'zone' => $zone, 'bench' => $bench, 'filing_date' => trim($row->childNodes->item(10)->nodeValue)));
if ($result['appeal_number']) {
array_push($results, $result);
}
}
scraperwiki::save_sqlite(array('appeal_number'), $results);
echo "Peak memory usage: " . memory_get_peak_usage() . "\n";
$query = null;
$results = null;
unset($query);
unset($results);
if ($results_page + 1 < $total_pages) {
scrape($session, $session_id, $zone, $bench, $appeal_date, $results_page + 1);
}
}
示例7: array_push
for ($i = 0; $i < $n; $i++) {
$row = $query->item($i);
if ($row->getElementsByTagName('a')->item(0)) {
$url = $row->getElementsByTagName('a')->item(0)->getAttribute('href');
} else {
$url = null;
}
array_push($results, @($result = array('name' => trim($row->childNodes->item(0)->nodeValue), 'url' => $url, 'source' => trim($row->childNodes->item(2)->nodeValue), 'date' => trim($row->childNodes->item(4)->nodeValue))));
if ($result['name'] == 'There are currently no firms listed under this section.') {
$results = array();
}
//Clean loop variables
$option = null;
$result = null;
unset($option);
unset($result);
}
//Clean up
$options = null;
$ch = null;
unset($options);
unset($ch);
scraperwiki::save_sqlite(array('name'), $results);
$results = null;
unset($results);
echo "Peak memory usage: " . memory_get_peak_usage() . "\n";
}
$pages = array('0-9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z');
foreach ($pages as $page) {
scrape($page);
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:uk_fca_warnings_from_foreign_regulators.php
示例8: unset
unset($result);
if ($i % 100 === 0) {
scraperwiki::save_sqlite(array('unique_id'), $results);
$results = array();
}
}
//Clean up
$query = null;
$xpath = null;
$ch = null;
unset($query);
unset($xpath);
unset($ch);
//Final save to database
scraperwiki::save_sqlite(array('unique_id'), $results);
$results = null;
unset($results);
echo "Peak memory usage: " . memory_get_peak_usage() . "\n";
if ($n < 10) {
return true;
} else {
return false;
}
}
$i = 1;
$a = scrape($i);
$i++;
while ($a === false) {
$a = scrape($i);
$i++;
}
示例9: do_sanity
//.........这里部分代码省略.........
}
}
// unwarn user who did improve last
$unwarnthree = mysqli_query($GLOBALS["___mysqli_ston"], "SELECT id,rat_warn_time FROM {$utables} WHERE {$udownloaded} > " . $min_dl . " AND {$uuploaded}/{$udownloaded} > " . $ban["wb_three"] . " AND id_level=" . $ban["wb_rank"] . " AND rat_warn_level = 3 ");
while ($unwarnc = mysqli_fetch_assoc($unwarnthree)) {
$lid = $unwarnc["id"];
quickQuery('UPDATE ' . $TABLE_PREFIX . 'users SET rat_warn_level=rat_warn_level-3 WHERE id=' . $lid);
}
}
}
// warn-ban system with acp end
// optimize peers table
quickQuery("OPTIMIZE TABLE {$TABLE_PREFIX}peers");
// delete readposts when topic don't exist or deleted *** should be done by delete, just in case
quickQuery("DELETE readposts FROM {$TABLE_PREFIX}readposts LEFT JOIN topics ON readposts.topicid = topics.id WHERE topics.id IS NULL");
// delete readposts when users was deleted *** should be done by delete, just in case
quickQuery("DELETE readposts FROM {$TABLE_PREFIX}readposts LEFT JOIN users ON readposts.userid = users.id WHERE users.id IS NULL");
// deleting orphan image in captcha folder (if image code is enabled)
$CAPTCHA_FOLDER = realpath("{$CURRENTPATH}/../{$CAPTCHA_FOLDER}");
if ($dir = @opendir($CAPTCHA_FOLDER . "/")) {
while (false !== ($file = @readdir($dir))) {
if ($ext = substr(strrchr($file, "."), 1) == "png") {
unlink("{$CAPTCHA_FOLDER}/{$file}");
}
}
@closedir($dir);
}
quickQuery("UPDATE `{$TABLE_PREFIX}users` SET `birthday_bonus`=0 WHERE DAYOFMONTH(`dob`)!=" . date('j'));
$res = mysqli_query($GLOBALS["___mysqli_ston"], "SELECT `u`.`id`, `u`.`dob`,`l`.`language_url` FROM `{$TABLE_PREFIX}users` `u` LEFT JOIN `language` `l` ON `u`.`language`=`l`.`id` WHERE DAYOFMONTH(`u`.`dob`)=" . date('j') . " AND MONTH(`u`.`dob`)=" . date('n') . " AND `u`.`dob`!=CURDATE() AND `u`.`birthday_bonus`=0 ORDER BY `l`.`language_url` ASC");
if (@mysqli_num_rows($res) > 0) {
global $THIS_BASEPATH;
$firstrun = 1;
$englang = "language/english";
$templang = $englang;
require_once $THIS_BASEPATH . "/" . $englang . "/lang_main.php";
while ($row = mysqli_fetch_assoc($res)) {
if ($row["language_url"] != $templang) {
if ($firstrun != 1) {
// Reset the language to English before loading the new language
require_once $THIS_BASEPATH . "/" . $englang . "/lang_main.php";
}
// Load the new language etc.
require_once $THIS_BASEPATH . "/" . $row["language_url"] . "/lang_main.php";
$templang = $row["language_url"];
$firstrun = 0;
}
$dob = explode("-", $row["dob"]);
$age = userage($dob[0], $dob[1], $dob[2]);
$bonus = round($age * $btit_settings["birthday_bonus"] * 1073741824);
$query1 = "UPDATE `{$TABLE_PREFIX}users` SET `uploaded`=`uploaded`+{$bonus}, `birthday_bonus`=1 WHERE `id`=" . $row["id"];
quickQuery($query1);
send_pm(0, $row["id"], addslashes($language["HB_SUBJECT"]), addslashes($language["HB_MESSAGE_1"] . makesize($bonus) . $language["HB_MESSAGE_2"] . $btit_settings["birthday_bonus"] . $language["HB_MESSAGE_3"]));
}
}
//timed rank
$datetimedt = date("Y-m-d H:i:s");
$rankstats = mysqli_query($GLOBALS["___mysqli_ston"], "SELECT * FROM {$TABLE_PREFIX}users WHERE timed_rank < '{$datetimedt}' AND rank_switch='yes'");
while ($arrdt = mysqli_fetch_assoc($rankstats)) {
if (mysqli_num_rows($rankstats) > 0) {
$res6 = mysqli_query($GLOBALS["___mysqli_ston"], "SELECT level FROM {$TABLE_PREFIX}users_level WHERE id ='{$arrdt['old_rank']}'");
$arr6 = mysqli_fetch_assoc($res6);
$oldrank = $arr6[level];
$subj = sqlesc("Your timed rank is expired !");
$msg = sqlesc("Your timed rank is expired !\n\n Your rank did changed back to " . $oldrank . "\n\n [color=red]This is a automatic system message , so DO NOT reply ![/color]");
send_pm(0, $arrdt["id"], $subj, $msg);
mysqli_query($GLOBALS["___mysqli_ston"], "UPDATE {$TABLE_PREFIX}users SET rank_switch='no', id_level = old_rank WHERE id='{$arrdt['id']}'") or sqlerr();
}
}
//timed rank end
//begin invitation system by dodge
global $INV_EXPIRES;
$deadtime = $INV_EXPIRES * 86400;
$user = do_sqlquery("SELECT inviter FROM {$TABLE_PREFIX}invitations WHERE time_invited < DATE_SUB(NOW(), INTERVAL {$deadtime} SECOND)");
@($arr = mysqli_fetch_assoc($user));
if (mysqli_num_rows($user) > 0) {
mysqli_query($GLOBALS["___mysqli_ston"], "UPDATE {$TABLE_PREFIX}users SET invitations=invitations+1 WHERE id = '" . $arr["inviter"] . "'");
mysqli_query($GLOBALS["___mysqli_ston"], "DELETE FROM {$TABLE_PREFIX}invitations WHERE inviter = '" . $arr["inviter"] . "' AND time_invited < DATE_SUB(NOW(), INTERVAL {$deadtime} SECOND)");
}
//end invitation system
do_updateranks();
// auto ext update
$num_torrents_to_update = 5;
$torrents = get_result("SELECT `announces`, `info_hash` FROM `{$TABLE_PREFIX}files` WHERE `external`='yes' ORDER BY `lastupdate` DESC LIMIT " . $num_torrents_to_update);
if (count($torrents) > 0) {
require_once "getscrape.php";
for ($i = 0; $i < count($torrents); $i++) {
$announces = @unserialize($torrents[$i]['announces']) ? unserialize($torrents[$i]['announces']) : array();
if (count($announces) > 0) {
$keys = array_keys($announces);
$random = mt_rand(0, count($keys) - 1);
$url = $keys[$random];
scrape($url, $torrents[$i]['info_hash']);
}
}
}
// auto ext update
// OK We're finished, let's reset max_execution_time and memory_limit back to the php.ini defaults
@ini_restore("max_execution_time");
@ini_restore("memory_limit");
}
示例10: scrape
function scrape($cookie, $viewstate, $event_validation, $page, $from, $to, $last_row_number)
{
echo "Loading data (" . $from['day'] . '/' . $from['month'] . '/' . $from['year'] . ' -> ' . $to['day'] . '/' . $to['month'] . '/' . $to['year'] . ") ...\n";
$dom = new DOMDocument();
if (!$page) {
@$dom->loadHTML(first_page($cookie, $viewstate, $event_validation, $from, $to));
} else {
@$dom->loadHTML(not_first_page($cookie, $viewstate, $event_validation, $page));
}
$xpath = new DOMXPath($dom);
@($records_found = intval(preg_replace('/\\D/', '', $xpath->query('//*[@id="Label5"]')->item(0)->nodeValue)));
$viewstate = $xpath->query('//*[@id="__VIEWSTATE"]')->item(0)->getAttribute('value');
$event_validation = $xpath->query('//*[@id="__EVENTVALIDATION"]');
if ($event_validation->length) {
$event_validation = $event_validation->item(0)->getAttribute('value');
}
$query = $xpath->query('//table[@id="DataGrid1"]/tr');
$dom = null;
$xpath = null;
$ch = null;
unset($dom);
unset($xpath);
unset($ch);
echo "Parsing results ...\n";
$results = array();
$n = $query->length;
for ($i = 1; $i < $n; $i++) {
$row = $query->item($i);
preg_match('/filename=(.*?)">/i', $row->ownerDocument->saveXML($row), $href);
@($result = array('case_number' => trim($row->childNodes->item(1)->nodeValue), 'path' => $href[1], 'petitioner' => trim(substr($row->childNodes->item(2)->nodeValue, 0, strpos($row->childNodes->item(2)->nodeValue, ' Vs. '))), 'respondent' => trim(substr($row->childNodes->item(2)->nodeValue, strpos($row->childNodes->item(2)->nodeValue, ' Vs. ') + 5)), 'judge' => trim(trim($row->childNodes->item(3)->nodeValue, 'Coram: ')), 'judgment_date' => trim($row->childNodes->item(4)->nodeValue), 'unique_id' => preg_replace('/\\s+/', '', $row->childNodes->item(0)->nodeValue . '-' . $row->childNodes->item(1)->nodeValue . '-' . $from['day'] . $from['month'] . $from['year'] . '-' . $to['day'] . $to['month'] . $to['year'])));
if ($result['case_number'] || $result['path'] || $result['petitioner'] || $result['respondent'] || $result['judgment_date']) {
$new_last_row_number = intval($row->childNodes->item(0)->nodeValue);
array_push($results, $result);
}
}
scraperwiki::save_sqlite(array('unique_id'), $results);
echo "Peak memory usage: " . memory_get_peak_usage() . "\n";
$query = null;
$results = null;
unset($query);
unset($results);
if (!isset($new_last_row_number)) {
$new_last_row_number = 0;
}
if ($last_row_number !== $new_last_row_number) {
scrape($cookie, $viewstate, $event_validation, $page + 1, $from, $to, $new_last_row_number);
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:48,代码来源:high_court_of_jammu_kashmir_srinagar_judgments.php
示例11: unset
$xpath = null;
unset($xpath);
$n = $query->length;
echo "Parsing Results ...\n";
$results = array();
for ($i = 0; $i < $n; $i++) {
$row = $query->item($i)->getElementsByTagName('td');
@($result = array('id' => preg_replace('/\\s/', '', $year . '-' . $row->item(2)->nodeValue), 'year' => trim($year), 'country_rank' => trim($row->item(0)->nodeValue), 'company' => trim($row->item(1)->nodeValue), 'global_rank' => trim($row->item(2)->nodeValue), 'city' => trim($row->item(3)->nodeValue), 'country' => trim($country), 'revenue' => trim($row->item(4)->nodeValue)));
if (!empty($result['company'])) {
array_push($results, $result);
}
//Clean loop variables
$option = null;
$result = null;
unset($option);
unset($result);
}
//Clean up
$options = null;
$ch = null;
unset($options);
unset($ch);
scraperwiki::save_sqlite(array('id'), $results);
$results = null;
unset($results);
echo "Peak memory usage: " . memory_get_peak_usage() . "\n";
}
$countries = array('Australia', 'Austria', 'Belgium', 'BelgiumNetherlands', 'Brazil', 'Britain', 'BritainNetherlands', 'Canada', 'China', 'Colombia', 'Denmark', 'Finland', 'France', 'Germany', 'Hungary', 'India', 'Ireland', 'Israel', 'Italy', 'Japan', 'Luxembourg', 'Malaysia', 'Mexico', 'Netherlands', 'Norway', 'Poland', 'Portugal', 'Russia', 'SaudiArabia', 'Singapore', 'SouthKorea', 'Spain', 'Sweden', 'Switzerland', 'Taiwan', 'Thailand', 'Turkey', 'US', 'UnitedArabEmirates', 'Venezuela');
foreach ($countries as $country) {
scrape(date('Y'), $country);
}
示例12: unset
//Clean loop variables
$option = null;
$result = null;
unset($option);
unset($result);
}
//Clean up
$options = null;
unset($options);
//Save to database
scraperwiki::save_sqlite(array('unique_id'), $results);
$results = null;
unset($results);
echo "Peak memory usage: " . memory_get_peak_usage() . "\n";
}
$data = scraperwiki::scrape('http://securities.stanford.edu/fmi/xsl/SCACPUDB/recordlist.xsl?-db=SCACPUDB&-lay=Search&-sortfield.1=FIC_DateFiled&-sortfield.2=LitigationName&-sortorder.1=ascend&-max=1&-findall=&-lay.response=ListGral&-encoding=UTF-8&-grammar=fmresultset&-skip=0');
echo "Loading data ...\n";
$dom = new DOMDocument();
@$dom->loadHTML($data);
$xpath = new DOMXPath($dom);
$dom = null;
unset($dom);
preg_match('/\\d+\\s+of\\s+(.*?)\\s+\\|/', $xpath->query('//td[@align="right"]')->item(0)->nodeValue, $total_results_match);
$total_results = intval($total_results_match[1]);
$xpath = null;
unset($xpath);
$results_per_page = 1000;
$n = $total_results / $results_per_page + 1;
for ($page = 1; $page < $n; $page++) {
scrape($page, $results_per_page);
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:stanford_securities_class_action_clearinghouse.php
示例13: unset
unset($option);
unset($result);
if ($i % 100 === 0) {
//scraperwiki::save_sqlite(array('unique_id'), $results);
$results = array();
}
}
//Clean up
$options = null;
$ch = null;
unset($options);
unset($ch);
//Final save to database
//scraperwiki::save_sqlite(array('unique_id'), $results);
$results = null;
unset($results);
}
for ($year = date('Y'); $year < date('Y') + 1; $year++) {
for ($month = date('m') - 1; $month < date('m') + 1; $month++) {
$end_day = cal_days_in_month(CAL_GREGORIAN, $month, $year);
$from = array(1, $month, $year);
$to = array($end_day, $month, $year);
$details = scrape($from, $to, 'get_results_total', null, 'NA', 'NA');
$cookie = $details[0];
$pages = ceil($details[1] / 50);
scrape($from, $to, 'first_page', $cookie, 0, $pages);
for ($i = 1; $i < $pages; $i++) {
scrape($from, $to, 'next_page', $cookie, $i, $pages);
}
}
}
示例14: unix_timestamp
$q = "SELECT searches.* FROM searches HAVING searches.updated_at IS NULL OR searches.updated_at < unix_timestamp(DATE_SUB(NOW(), INTERVAL 1 HOUR))";
$stmt = $db->prepare($q);
$p = array(date('Y-m-d H:i:s'));
$stmt->execute($p);
$data = $stmt->fetchAll(PDO::FETCH_ASSOC);
$res = array();
//TODO reduce scraping frequency when there are rarely any new ads
foreach ($data as $search) {
if (isset($search['lang']) && $search['lang']) {
setLeBonMailLocale(getSearchLocale($search['lang']));
} else {
setLeBonMailLocale(array('fr_FR', 'fr_FR.utf8'));
}
if ($search['annonces'] == null || $search['updated_at'] == null || $search['updated_at'] < time() - 3600 * $app->config('cache_duration')) {
$app->getLog()->info('scraping for ' . $search['url']);
$annonces = scrape($search['url'], $app->getLog());
$app->getLog()->info('Got ' . count($annonces) . ' adds');
if (is_array($annonces) && !empty($annonces)) {
$p = array(serialize($annonces), time(), serialize($annonces[0]), $search['hash']);
$q = "UPDATE searches SET annonces=?, updated_at=?, last=? WHERE hash=?";
$stmt = $db->prepare($q);
$stmt->execute($p);
} else {
$p = array(time(), $search['hash']);
$q = "UPDATE searches SET updated_at=? WHERE hash=?";
$stmt = $db->prepare($q);
$stmt->execute($p);
}
} else {
$annonces = unserialize($search['annonces']);
}
示例15: updatedata
function updatedata()
{
global $db;
require_once INCL_PATH . 'getscrape.php';
global $update_interval;
if (0 + $update_interval == 0) {
return;
}
$now = vars::$timestamp;
$res = @$db->query("SELECT last_time FROM tasks WHERE task='update'");
$row = @$res->fetch_array(MYSQLI_BOTH);
if (!$row) {
$db->query("INSERT INTO tasks (task, last_time) VALUES ('update', " . $now . ")");
return;
}
$ts = $row[0];
if ($ts + $update_interval > $now) {
return;
}
$db->query("UPDATE tasks SET last_time = " . $now . " WHERE task = 'update' AND last_time = " . $ts);
if (!$db->affected_rows) {
return;
}
// new control time is lastupdate (before the current one) - update interval
$ts = $ts - $update_interval;
$res = @$db->query("SELECT announce_url FROM namemap WHERE external = 'yes' AND UNIX_TIMESTAMP(lastupdate) < " . $ts . " ORDER BY lastupdate ASC LIMIT 1");
if (!$res || $res->num_rows == 0) {
return;
}
// get the url to scrape, take 5 torrent at a time (try to getting multiscrape)
$row = $res->fetch_row();
$resurl = @$db->query("SELECT info_hash FROM namemap WHERE external = 'yes' AND UNIX_TIMESTAMP(lastupdate) < " . $ts . " AND announce_url = '" . $row[0] . "' ORDER BY lastupdate DESC LIMIT 5");
if (!$resurl || $resurl->num_rows == 0) {
return $combinedinfohash = array();
}
while ($rhash = $resurl->fetch_row()) {
$combinedinfohash[] = $rhash[0];
}
scrape($row[0], implode("','", $combinedinfohash));
}