当前位置: 首页>>代码示例>>PHP>>正文


PHP scrape函数代码示例

本文整理汇总了PHP中scrape函数的典型用法代码示例。如果您正苦于以下问题:PHP scrape函数的具体用法?PHP scrape怎么用?PHP scrape使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了scrape函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: add_btag

function add_btag($tag, $reg)
{
    $api_url = 'https://www.hotslogs.com/API/Players/';
    $battle_tag = implode('_', explode('#', $tag));
    $u = $api_url . $reg . '/' . $battle_tag;
    return scrape($u);
}
开发者ID:Vooders,项目名称:hots-logs-wp-plugin,代码行数:7,代码来源:api_scraper.php

示例2: shouldScrapeByTerm

function shouldScrapeByTerm($term)
{
    $cc = new cURL();
    $output = $cc->get('http://www.nyu.edu/registrar/listings/');
    $output = substring($output, 'Course data was last updated on ', '<form method="post" name="selForm" action="results.html">', true);
    $date = substr($output, 0, strlen($output) - 5);
    $scrapeDate = date('Y-m-d H:i:s', strtotime($date));
    $sql = 'SELECT * FROM `nyu_courses_settings`';
    $result = mysql_query($sql);
    $row = mysql_fetch_assoc($result);
    if (!(stripos($term, "Fall") === false)) {
        $column = "latest_fall_scrape";
    } else {
        if (!(stripos($term, "Spring") === false)) {
            $column = "latest_spring_scrape";
        } else {
            if (!(stripos($term, "Winter") === false)) {
                $column = "latest_ws_scrape";
            } else {
                if (!(stripos($term, "Summer") === false)) {
                    $column = "latest_ws_scrape";
                }
            }
        }
    }
    if ($row[$column] != $scrapeDate) {
        scrape($term, $scrapeDate);
        deleteOldData($term, $scrapeDate);
        $sql = "UPDATE `nyu_courses_settings` SET `{$column}` = '{$scrapeDate}'";
        mysql_query($sql);
    }
}
开发者ID:rickyc,项目名称:whatchamacallit,代码行数:32,代码来源:scrape.php

示例3: scrape

function scrape($s, $offset)
{
    // locate para
    $p_start = strpos($s, "<p>", $offset);
    $p_end = strpos($s, "</p>", $p_start);
    if ($p_start === false || $p_end === false) {
        return false;
    }
    // extract para
    $p = substr($s, $p_start, $p_end - $p_start);
    if ($p === false) {
        return false;
    }
    // get the right link
    $link = find_apt_link($p, 0);
    // if link found return it
    if ($link !== false) {
        return $link;
    } else {
        return scrape($s, $p_end + 4);
    }
}
开发者ID:AdeelH,项目名称:path-to-philosophy,代码行数:22,代码来源:func.php

示例4: array_push

            array_push($results, $result);
        }
        //Clean loop variables
        $option = null;
        $result = null;
        unset($option);
        unset($result);
        if ($i % 100 === 0) {
            scraperwiki::save_sqlite(array('path'), $results);
            $results = array();
        }
    }
    //Clean up
    $query = null;
    $xpath = null;
    $ch = null;
    unset($query);
    unset($xpath);
    unset($ch);
    //Final save to database
    scraperwiki::save_sqlite(array('path'), $results);
    $results = null;
    unset($results);
    echo "Peak memory usage: " . memory_get_peak_usage() . "\n";
}
$case_types = array(67, 57, 65, 66, 62, 61, 55, 12, 11, 8, 63, 58, 20, 37, 34, 42, 21, 39, 35, 5, 24, 27, 54, 6, 29, 7, 64, 53, 52, 16, 51, 15, 28, 14, 1, 25, 31, 3, 19, 33, 2, 10, 23, 32, 60, 56, 74, 36, 40, 68, 9, 26, 43, 73, 4, 22, 17, 50);
for ($year = date('Y'); $year < date('Y') + 1; $year++) {
    foreach ($case_types as $case_type) {
        scrape($year, $case_type);
    }
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:jharkhand_high_court_case_status.php

示例5: updatedata

function updatedata()
{
    global $CURRENTPATH, $TABLE_PREFIX, $btit_settings;
    require_once $CURRENTPATH . '/getscrape.php';
    global $update_interval;
    if ($update_interval == 0) {
        return;
    }
    $now = time();
    $res = get_result("SELECT last_time as lt FROM {$TABLE_PREFIX}tasks WHERE task='update'", true, $btit_settings['cache_duration']);
    $row = $res[0];
    if (!$row) {
        do_sqlquery("INSERT INTO {$TABLE_PREFIX}tasks (task, last_time) VALUES ('update',{$now})");
        return;
    }
    $ts = $row['lt'];
    if ($ts + $update_interval > $now) {
        return;
    }
    do_sqlquery("UPDATE {$TABLE_PREFIX}tasks SET last_time={$now} WHERE task='update' AND last_time = {$ts}");
    if (!mysqli_affected_rows($GLOBALS["___mysqli_ston"])) {
        return;
    }
    $res = get_result("SELECT announce_url FROM {$TABLE_PREFIX}files WHERE external='yes' ORDER BY lastupdate ASC LIMIT 1", true, $btit_settings['cache_duration']);
    if (!$res || count($res) == 0) {
        return;
    }
    // get the url to scrape, take 5 torrent at a time (try to getting multiscrape)
    $row = $res[0];
    $resurl = get_result("SELECT info_hash FROM {$TABLE_PREFIX}files WHERE external='yes' AND announce_url='" . $row['announce_url'] . "' ORDER BY lastupdate ASC LIMIT 5", true, $btit_settings['cache_duration']);
    if (!$resurl || count($resurl) == 0) {
        return $combinedinfohash = array();
    }
    foreach ($resurl as $id => $rhash) {
        $combinedinfohash[] = $rhash['info_hash'];
    }
    //scrape($row["announce_url"],$row["info_hash"]);
    scrape($row[0], implode("','", $combinedinfohash));
}
开发者ID:Karpec,项目名称:gizd,代码行数:39,代码来源:functions.php

示例6: scrape

function scrape($session, $session_id, $zone, $bench, $appeal_date, $page)
{
    echo "Loading data ...\n";
    $header = array('X-Prototype-Version: 1.4.0', 'X-Requested-With: XMLHttpRequest', 'SOAPAction: ""', 'Content-type: application/x-www-form-urlencoded text/xml; charset=UTF-8', 'request-type: SOAP');
    $soap = '<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><Body xmlns="http://schemas.xmlsoap.org/soap/envelope/"><GetUpdatedObjects xmlns="http://schemas.eclipse.org/birt"><Operation><Target><Id>Document</Id><Type>Document</Type></Target><Operator>GetPage</Operator><Oprand><Name>Appeal No</Name><Value></Value></Oprand><Oprand><Name>__isdisplay__Appeal No</Name><Value></Value></Oprand><Oprand><Name>Serial No</Name><Value></Value></Oprand><Oprand><Name>__isdisplay__Serial No</Name><Value></Value></Oprand><Oprand><Name>Assessee Name</Name><Value>_</Value></Oprand><Oprand><Name>__isdisplay__Assessee Name</Name><Value></Value></Oprand><Oprand><Name>searchWhat</Name><Value>searchByDate</Value></Oprand><Oprand><Name>__isdisplay__searchWhat</Name><Value>searchByDate</Value></Oprand><Oprand><Name>AssType</Name><Value>null</Value></Oprand><Oprand><Name>__isdisplay__AssType</Name><Value>DontKnow</Value></Oprand><Oprand><Name>appealDate</Name><Value>' . $appeal_date . '</Value></Oprand><Oprand><Name>__isdisplay__appealDate</Name><Value>' . $appeal_date . '</Value></Oprand><Oprand><Name>Bench</Name><Value>' . $bench . '</Value></Oprand><Oprand><Name>__isdisplay__Bench</Name><Value>' . $bench . '</Value></Oprand><Oprand><Name>__page</Name><Value>' . $page . '</Value></Oprand><Oprand><Name>__svg</Name><Value>false</Value></Oprand><Oprand><Name>__page</Name><Value>' . $page . '</Value></Oprand><Oprand><Name>__taskid</Name><Value></Value></Oprand></Operation></GetUpdatedObjects></Body></soap:Envelope>';
    $ch = curl_init();
    curl_setopt_array($ch, array(CURLOPT_URL => 'http://www.itatonline.in:8080/itat/jsp/runBirt2.jsp?subAction=showReoprt&__report=CaseDetails1_' . $zone . '.rptdesign&searchWhat=searchByDate&Serial%20No=&Appeal%20No=&Assessee%20Name=&AssType=null&appealDate=' . urlencode($appeal_date) . '&Bench=' . $bench . '&__sessionId=' . $session_id, CURLOPT_COOKIE => $session, CURLOPT_HTTPHEADER => $header, CURLOPT_POST => true, CURLOPT_POSTFIELDS => $soap, CURLOPT_RETURNTRANSFER => true));
    $dom = new DOMDocument();
    @$dom->loadHTML('<html><body>' . htmlspecialchars_decode(curl_exec($ch)) . '</body></html>');
    $xpath = new DOMXPath($dom);
    $pages = $xpath->query('//updatedata/data/page');
    if ($pages->item(0)) {
        $results_page = intval($pages->item(0)->childNodes->item(0)->nodeValue);
    } else {
        echo 'No data found for ' . $zone . ' : ' . $bench . ' on ' . $appeal_date . " ...\n";
        return false;
        //no data
    }
    $total_pages = intval($pages->item(0)->childNodes->item(1)->nodeValue);
    $query = $xpath->query('//tr[@class="style_9"]');
    curl_close($ch);
    $header = null;
    $soap = null;
    $xpath = null;
    $pages = null;
    $dom = null;
    $ch = null;
    unset($header);
    unset($soap);
    unset($xpath);
    unset($pages);
    unset($dom);
    unset($ch);
    echo "Parsing results ...\n";
    $results = array();
    $n = $query->length;
    for ($i = 0; $i < $n; $i++) {
        $row = $query->item($i);
        @($result = array('appeal_number' => trim($row->childNodes->item(2)->nodeValue), 'assessee_name' => trim($row->childNodes->item(4)->nodeValue), 'filed_by' => trim($row->childNodes->item(6)->nodeValue), 'zone' => $zone, 'bench' => $bench, 'filing_date' => trim($row->childNodes->item(10)->nodeValue)));
        if ($result['appeal_number']) {
            array_push($results, $result);
        }
    }
    scraperwiki::save_sqlite(array('appeal_number'), $results);
    echo "Peak memory usage: " . memory_get_peak_usage() . "\n";
    $query = null;
    $results = null;
    unset($query);
    unset($results);
    if ($results_page + 1 < $total_pages) {
        scrape($session, $session_id, $zone, $bench, $appeal_date, $results_page + 1);
    }
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:53,代码来源:indian_tax_appellate_tribunal_cases.php

示例7: array_push

    for ($i = 0; $i < $n; $i++) {
        $row = $query->item($i);
        if ($row->getElementsByTagName('a')->item(0)) {
            $url = $row->getElementsByTagName('a')->item(0)->getAttribute('href');
        } else {
            $url = null;
        }
        array_push($results, @($result = array('name' => trim($row->childNodes->item(0)->nodeValue), 'url' => $url, 'source' => trim($row->childNodes->item(2)->nodeValue), 'date' => trim($row->childNodes->item(4)->nodeValue))));
        if ($result['name'] == 'There are currently no firms listed under this section.') {
            $results = array();
        }
        //Clean loop variables
        $option = null;
        $result = null;
        unset($option);
        unset($result);
    }
    //Clean up
    $options = null;
    $ch = null;
    unset($options);
    unset($ch);
    scraperwiki::save_sqlite(array('name'), $results);
    $results = null;
    unset($results);
    echo "Peak memory usage: " . memory_get_peak_usage() . "\n";
}
$pages = array('0-9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z');
foreach ($pages as $page) {
    scrape($page);
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:uk_fca_warnings_from_foreign_regulators.php

示例8: unset

        unset($result);
        if ($i % 100 === 0) {
            scraperwiki::save_sqlite(array('unique_id'), $results);
            $results = array();
        }
    }
    //Clean up
    $query = null;
    $xpath = null;
    $ch = null;
    unset($query);
    unset($xpath);
    unset($ch);
    //Final save to database
    scraperwiki::save_sqlite(array('unique_id'), $results);
    $results = null;
    unset($results);
    echo "Peak memory usage: " . memory_get_peak_usage() . "\n";
    if ($n < 10) {
        return true;
    } else {
        return false;
    }
}
$i = 1;
$a = scrape($i);
$i++;
while ($a === false) {
    $a = scrape($i);
    $i++;
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:jharkhand_high_court_judgments.php

示例9: do_sanity


//.........这里部分代码省略.........
                }
            }
            // unwarn user who did improve last
            $unwarnthree = mysqli_query($GLOBALS["___mysqli_ston"], "SELECT id,rat_warn_time FROM {$utables} WHERE {$udownloaded}  > " . $min_dl . " AND {$uuploaded}/{$udownloaded} > " . $ban["wb_three"] . " AND id_level=" . $ban["wb_rank"] . " AND rat_warn_level = 3 ");
            while ($unwarnc = mysqli_fetch_assoc($unwarnthree)) {
                $lid = $unwarnc["id"];
                quickQuery('UPDATE ' . $TABLE_PREFIX . 'users SET rat_warn_level=rat_warn_level-3 WHERE id=' . $lid);
            }
        }
    }
    // warn-ban system with acp end
    //  optimize peers table
    quickQuery("OPTIMIZE TABLE {$TABLE_PREFIX}peers");
    // delete readposts when topic don't exist or deleted  *** should be done by delete, just in case
    quickQuery("DELETE readposts FROM {$TABLE_PREFIX}readposts LEFT JOIN topics ON readposts.topicid = topics.id WHERE topics.id IS NULL");
    // delete readposts when users was deleted *** should be done by delete, just in case
    quickQuery("DELETE readposts FROM {$TABLE_PREFIX}readposts LEFT JOIN users ON readposts.userid = users.id WHERE users.id IS NULL");
    // deleting orphan image in captcha folder (if image code is enabled)
    $CAPTCHA_FOLDER = realpath("{$CURRENTPATH}/../{$CAPTCHA_FOLDER}");
    if ($dir = @opendir($CAPTCHA_FOLDER . "/")) {
        while (false !== ($file = @readdir($dir))) {
            if ($ext = substr(strrchr($file, "."), 1) == "png") {
                unlink("{$CAPTCHA_FOLDER}/{$file}");
            }
        }
        @closedir($dir);
    }
    quickQuery("UPDATE `{$TABLE_PREFIX}users` SET `birthday_bonus`=0 WHERE DAYOFMONTH(`dob`)!=" . date('j'));
    $res = mysqli_query($GLOBALS["___mysqli_ston"], "SELECT `u`.`id`, `u`.`dob`,`l`.`language_url` FROM `{$TABLE_PREFIX}users` `u` LEFT JOIN `language` `l` ON `u`.`language`=`l`.`id` WHERE DAYOFMONTH(`u`.`dob`)=" . date('j') . " AND MONTH(`u`.`dob`)=" . date('n') . " AND `u`.`dob`!=CURDATE() AND `u`.`birthday_bonus`=0 ORDER BY `l`.`language_url` ASC");
    if (@mysqli_num_rows($res) > 0) {
        global $THIS_BASEPATH;
        $firstrun = 1;
        $englang = "language/english";
        $templang = $englang;
        require_once $THIS_BASEPATH . "/" . $englang . "/lang_main.php";
        while ($row = mysqli_fetch_assoc($res)) {
            if ($row["language_url"] != $templang) {
                if ($firstrun != 1) {
                    // Reset the language to English before loading the new language
                    require_once $THIS_BASEPATH . "/" . $englang . "/lang_main.php";
                }
                // Load the new language etc.
                require_once $THIS_BASEPATH . "/" . $row["language_url"] . "/lang_main.php";
                $templang = $row["language_url"];
                $firstrun = 0;
            }
            $dob = explode("-", $row["dob"]);
            $age = userage($dob[0], $dob[1], $dob[2]);
            $bonus = round($age * $btit_settings["birthday_bonus"] * 1073741824);
            $query1 = "UPDATE `{$TABLE_PREFIX}users` SET `uploaded`=`uploaded`+{$bonus}, `birthday_bonus`=1 WHERE `id`=" . $row["id"];
            quickQuery($query1);
            send_pm(0, $row["id"], addslashes($language["HB_SUBJECT"]), addslashes($language["HB_MESSAGE_1"] . makesize($bonus) . $language["HB_MESSAGE_2"] . $btit_settings["birthday_bonus"] . $language["HB_MESSAGE_3"]));
        }
    }
    //timed rank
    $datetimedt = date("Y-m-d H:i:s");
    $rankstats = mysqli_query($GLOBALS["___mysqli_ston"], "SELECT * FROM {$TABLE_PREFIX}users WHERE timed_rank < '{$datetimedt}' AND rank_switch='yes'");
    while ($arrdt = mysqli_fetch_assoc($rankstats)) {
        if (mysqli_num_rows($rankstats) > 0) {
            $res6 = mysqli_query($GLOBALS["___mysqli_ston"], "SELECT level FROM {$TABLE_PREFIX}users_level WHERE id ='{$arrdt['old_rank']}'");
            $arr6 = mysqli_fetch_assoc($res6);
            $oldrank = $arr6[level];
            $subj = sqlesc("Your timed rank is expired !");
            $msg = sqlesc("Your timed rank is expired !\n\n Your rank did changed back to " . $oldrank . "\n\n [color=red]This is a automatic system message , so DO NOT reply ![/color]");
            send_pm(0, $arrdt["id"], $subj, $msg);
            mysqli_query($GLOBALS["___mysqli_ston"], "UPDATE {$TABLE_PREFIX}users SET rank_switch='no', id_level = old_rank WHERE id='{$arrdt['id']}'") or sqlerr();
        }
    }
    //timed rank end
    //begin invitation system by dodge
    global $INV_EXPIRES;
    $deadtime = $INV_EXPIRES * 86400;
    $user = do_sqlquery("SELECT inviter FROM {$TABLE_PREFIX}invitations WHERE time_invited < DATE_SUB(NOW(), INTERVAL {$deadtime} SECOND)");
    @($arr = mysqli_fetch_assoc($user));
    if (mysqli_num_rows($user) > 0) {
        mysqli_query($GLOBALS["___mysqli_ston"], "UPDATE {$TABLE_PREFIX}users SET invitations=invitations+1 WHERE id = '" . $arr["inviter"] . "'");
        mysqli_query($GLOBALS["___mysqli_ston"], "DELETE FROM {$TABLE_PREFIX}invitations WHERE inviter = '" . $arr["inviter"] . "' AND time_invited < DATE_SUB(NOW(), INTERVAL {$deadtime} SECOND)");
    }
    //end invitation system
    do_updateranks();
    // auto ext update
    $num_torrents_to_update = 5;
    $torrents = get_result("SELECT `announces`, `info_hash` FROM `{$TABLE_PREFIX}files` WHERE `external`='yes' ORDER BY `lastupdate` DESC LIMIT " . $num_torrents_to_update);
    if (count($torrents) > 0) {
        require_once "getscrape.php";
        for ($i = 0; $i < count($torrents); $i++) {
            $announces = @unserialize($torrents[$i]['announces']) ? unserialize($torrents[$i]['announces']) : array();
            if (count($announces) > 0) {
                $keys = array_keys($announces);
                $random = mt_rand(0, count($keys) - 1);
                $url = $keys[$random];
                scrape($url, $torrents[$i]['info_hash']);
            }
        }
    }
    // auto ext update
    // OK We're finished, let's reset max_execution_time and memory_limit back to the php.ini defaults
    @ini_restore("max_execution_time");
    @ini_restore("memory_limit");
}
开发者ID:Karpec,项目名称:gizd,代码行数:101,代码来源:sanity.php

示例10: scrape

function scrape($cookie, $viewstate, $event_validation, $page, $from, $to, $last_row_number)
{
    echo "Loading data (" . $from['day'] . '/' . $from['month'] . '/' . $from['year'] . ' -> ' . $to['day'] . '/' . $to['month'] . '/' . $to['year'] . ") ...\n";
    $dom = new DOMDocument();
    if (!$page) {
        @$dom->loadHTML(first_page($cookie, $viewstate, $event_validation, $from, $to));
    } else {
        @$dom->loadHTML(not_first_page($cookie, $viewstate, $event_validation, $page));
    }
    $xpath = new DOMXPath($dom);
    @($records_found = intval(preg_replace('/\\D/', '', $xpath->query('//*[@id="Label5"]')->item(0)->nodeValue)));
    $viewstate = $xpath->query('//*[@id="__VIEWSTATE"]')->item(0)->getAttribute('value');
    $event_validation = $xpath->query('//*[@id="__EVENTVALIDATION"]');
    if ($event_validation->length) {
        $event_validation = $event_validation->item(0)->getAttribute('value');
    }
    $query = $xpath->query('//table[@id="DataGrid1"]/tr');
    $dom = null;
    $xpath = null;
    $ch = null;
    unset($dom);
    unset($xpath);
    unset($ch);
    echo "Parsing results ...\n";
    $results = array();
    $n = $query->length;
    for ($i = 1; $i < $n; $i++) {
        $row = $query->item($i);
        preg_match('/filename=(.*?)">/i', $row->ownerDocument->saveXML($row), $href);
        @($result = array('case_number' => trim($row->childNodes->item(1)->nodeValue), 'path' => $href[1], 'petitioner' => trim(substr($row->childNodes->item(2)->nodeValue, 0, strpos($row->childNodes->item(2)->nodeValue, ' Vs. '))), 'respondent' => trim(substr($row->childNodes->item(2)->nodeValue, strpos($row->childNodes->item(2)->nodeValue, ' Vs. ') + 5)), 'judge' => trim(trim($row->childNodes->item(3)->nodeValue, 'Coram: ')), 'judgment_date' => trim($row->childNodes->item(4)->nodeValue), 'unique_id' => preg_replace('/\\s+/', '', $row->childNodes->item(0)->nodeValue . '-' . $row->childNodes->item(1)->nodeValue . '-' . $from['day'] . $from['month'] . $from['year'] . '-' . $to['day'] . $to['month'] . $to['year'])));
        if ($result['case_number'] || $result['path'] || $result['petitioner'] || $result['respondent'] || $result['judgment_date']) {
            $new_last_row_number = intval($row->childNodes->item(0)->nodeValue);
            array_push($results, $result);
        }
    }
    scraperwiki::save_sqlite(array('unique_id'), $results);
    echo "Peak memory usage: " . memory_get_peak_usage() . "\n";
    $query = null;
    $results = null;
    unset($query);
    unset($results);
    if (!isset($new_last_row_number)) {
        $new_last_row_number = 0;
    }
    if ($last_row_number !== $new_last_row_number) {
        scrape($cookie, $viewstate, $event_validation, $page + 1, $from, $to, $new_last_row_number);
    }
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:48,代码来源:high_court_of_jammu_kashmir_srinagar_judgments.php

示例11: unset

    $xpath = null;
    unset($xpath);
    $n = $query->length;
    echo "Parsing Results ...\n";
    $results = array();
    for ($i = 0; $i < $n; $i++) {
        $row = $query->item($i)->getElementsByTagName('td');
        @($result = array('id' => preg_replace('/\\s/', '', $year . '-' . $row->item(2)->nodeValue), 'year' => trim($year), 'country_rank' => trim($row->item(0)->nodeValue), 'company' => trim($row->item(1)->nodeValue), 'global_rank' => trim($row->item(2)->nodeValue), 'city' => trim($row->item(3)->nodeValue), 'country' => trim($country), 'revenue' => trim($row->item(4)->nodeValue)));
        if (!empty($result['company'])) {
            array_push($results, $result);
        }
        //Clean loop variables
        $option = null;
        $result = null;
        unset($option);
        unset($result);
    }
    //Clean up
    $options = null;
    $ch = null;
    unset($options);
    unset($ch);
    scraperwiki::save_sqlite(array('id'), $results);
    $results = null;
    unset($results);
    echo "Peak memory usage: " . memory_get_peak_usage() . "\n";
}
$countries = array('Australia', 'Austria', 'Belgium', 'BelgiumNetherlands', 'Brazil', 'Britain', 'BritainNetherlands', 'Canada', 'China', 'Colombia', 'Denmark', 'Finland', 'France', 'Germany', 'Hungary', 'India', 'Ireland', 'Israel', 'Italy', 'Japan', 'Luxembourg', 'Malaysia', 'Mexico', 'Netherlands', 'Norway', 'Poland', 'Portugal', 'Russia', 'SaudiArabia', 'Singapore', 'SouthKorea', 'Spain', 'Sweden', 'Switzerland', 'Taiwan', 'Thailand', 'Turkey', 'US', 'UnitedArabEmirates', 'Venezuela');
foreach ($countries as $country) {
    scrape(date('Y'), $country);
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:fortune_global_500.php

示例12: unset

        //Clean loop variables
        $option = null;
        $result = null;
        unset($option);
        unset($result);
    }
    //Clean up
    $options = null;
    unset($options);
    //Save to database
    scraperwiki::save_sqlite(array('unique_id'), $results);
    $results = null;
    unset($results);
    echo "Peak memory usage: " . memory_get_peak_usage() . "\n";
}
$data = scraperwiki::scrape('http://securities.stanford.edu/fmi/xsl/SCACPUDB/recordlist.xsl?-db=SCACPUDB&-lay=Search&-sortfield.1=FIC_DateFiled&-sortfield.2=LitigationName&-sortorder.1=ascend&-max=1&-findall=&-lay.response=ListGral&-encoding=UTF-8&-grammar=fmresultset&-skip=0');
echo "Loading data ...\n";
$dom = new DOMDocument();
@$dom->loadHTML($data);
$xpath = new DOMXPath($dom);
$dom = null;
unset($dom);
preg_match('/\\d+\\s+of\\s+(.*?)\\s+\\|/', $xpath->query('//td[@align="right"]')->item(0)->nodeValue, $total_results_match);
$total_results = intval($total_results_match[1]);
$xpath = null;
unset($xpath);
$results_per_page = 1000;
$n = $total_results / $results_per_page + 1;
for ($page = 1; $page < $n; $page++) {
    scrape($page, $results_per_page);
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:stanford_securities_class_action_clearinghouse.php

示例13: unset

        unset($option);
        unset($result);
        if ($i % 100 === 0) {
            //scraperwiki::save_sqlite(array('unique_id'), $results);
            $results = array();
        }
    }
    //Clean up
    $options = null;
    $ch = null;
    unset($options);
    unset($ch);
    //Final save to database
    //scraperwiki::save_sqlite(array('unique_id'), $results);
    $results = null;
    unset($results);
}
for ($year = date('Y'); $year < date('Y') + 1; $year++) {
    for ($month = date('m') - 1; $month < date('m') + 1; $month++) {
        $end_day = cal_days_in_month(CAL_GREGORIAN, $month, $year);
        $from = array(1, $month, $year);
        $to = array($end_day, $month, $year);
        $details = scrape($from, $to, 'get_results_total', null, 'NA', 'NA');
        $cookie = $details[0];
        $pages = ceil($details[1] / 50);
        scrape($from, $to, 'first_page', $cookie, 0, $pages);
        for ($i = 1; $i < $pages; $i++) {
            scrape($from, $to, 'next_page', $cookie, $i, $pages);
        }
    }
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:31,代码来源:high_court_of_allahabad_judgments.php

示例14: unix_timestamp

 $q = "SELECT searches.* FROM searches HAVING searches.updated_at IS NULL  OR searches.updated_at < unix_timestamp(DATE_SUB(NOW(), INTERVAL 1 HOUR))";
 $stmt = $db->prepare($q);
 $p = array(date('Y-m-d H:i:s'));
 $stmt->execute($p);
 $data = $stmt->fetchAll(PDO::FETCH_ASSOC);
 $res = array();
 //TODO reduce scraping frequency when there are rarely any new ads
 foreach ($data as $search) {
     if (isset($search['lang']) && $search['lang']) {
         setLeBonMailLocale(getSearchLocale($search['lang']));
     } else {
         setLeBonMailLocale(array('fr_FR', 'fr_FR.utf8'));
     }
     if ($search['annonces'] == null || $search['updated_at'] == null || $search['updated_at'] < time() - 3600 * $app->config('cache_duration')) {
         $app->getLog()->info('scraping for ' . $search['url']);
         $annonces = scrape($search['url'], $app->getLog());
         $app->getLog()->info('Got ' . count($annonces) . ' adds');
         if (is_array($annonces) && !empty($annonces)) {
             $p = array(serialize($annonces), time(), serialize($annonces[0]), $search['hash']);
             $q = "UPDATE searches SET annonces=?, updated_at=?, last=? WHERE hash=?";
             $stmt = $db->prepare($q);
             $stmt->execute($p);
         } else {
             $p = array(time(), $search['hash']);
             $q = "UPDATE searches SET updated_at=? WHERE hash=?";
             $stmt = $db->prepare($q);
             $stmt->execute($p);
         }
     } else {
         $annonces = unserialize($search['annonces']);
     }
开发者ID:inscriptionweb,项目名称:lebonmail,代码行数:31,代码来源:index.php

示例15: updatedata

function updatedata()
{
    global $db;
    require_once INCL_PATH . 'getscrape.php';
    global $update_interval;
    if (0 + $update_interval == 0) {
        return;
    }
    $now = vars::$timestamp;
    $res = @$db->query("SELECT last_time FROM tasks WHERE task='update'");
    $row = @$res->fetch_array(MYSQLI_BOTH);
    if (!$row) {
        $db->query("INSERT INTO tasks (task, last_time) VALUES ('update', " . $now . ")");
        return;
    }
    $ts = $row[0];
    if ($ts + $update_interval > $now) {
        return;
    }
    $db->query("UPDATE tasks SET last_time = " . $now . " WHERE task = 'update' AND last_time = " . $ts);
    if (!$db->affected_rows) {
        return;
    }
    // new control time is lastupdate (before the current one) - update interval
    $ts = $ts - $update_interval;
    $res = @$db->query("SELECT announce_url FROM namemap WHERE external = 'yes' AND UNIX_TIMESTAMP(lastupdate) < " . $ts . " ORDER BY lastupdate ASC LIMIT 1");
    if (!$res || $res->num_rows == 0) {
        return;
    }
    // get the url to scrape, take 5 torrent at a time (try to getting multiscrape)
    $row = $res->fetch_row();
    $resurl = @$db->query("SELECT info_hash FROM namemap WHERE external = 'yes' AND UNIX_TIMESTAMP(lastupdate) < " . $ts . " AND announce_url = '" . $row[0] . "' ORDER BY lastupdate DESC LIMIT 5");
    if (!$resurl || $resurl->num_rows == 0) {
        return $combinedinfohash = array();
    }
    while ($rhash = $resurl->fetch_row()) {
        $combinedinfohash[] = $rhash[0];
    }
    scrape($row[0], implode("','", $combinedinfohash));
}
开发者ID:HDVinnie,项目名称:BtiTracker-1.5.0,代码行数:40,代码来源:functions.php


注:本文中的scrape函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。