本文整理汇总了PHP中Crawler::getInstance方法的典型用法代码示例。如果您正苦于以下问题:PHP Crawler::getInstance方法的具体用法?PHP Crawler::getInstance怎么用?PHP Crawler::getInstance使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Crawler
的用法示例。
在下文中一共展示了Crawler::getInstance方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: setUp
/**
* Set up
* Initializes Config and Webapp objects
*/
function setUp()
{
$config = Config::getInstance();
$webapp = Webapp::getInstance();
$crawler = Crawler::getInstance();
parent::setUp();
}
示例2: control
public function control()
{
$output = "";
$authorized = false;
if (isset($this->argc) && $this->argc > 1) {
// check for CLI credentials
$session = new Session();
$username = $this->argv[1];
if ($this->argc > 2) {
$pw = $this->argv[2];
} else {
$pw = getenv('THINKUP_PASSWORD');
}
$owner_dao = DAOFactory::getDAO('OwnerDAO');
$owner = $owner_dao->getByEmail($username);
if ($owner_dao->isOwnerAuthorized($username, $pw)) {
$authorized = true;
Session::completeLogin($owner);
} else {
$output = "ERROR: Incorrect username and password.";
}
} else {
// check user is logged in on the web
if ($this->isLoggedIn()) {
$authorized = true;
} else {
$output = "ERROR: Invalid or missing username and password.";
}
}
if ($authorized) {
$crawler = Crawler::getInstance();
$crawler->crawl();
}
return $output;
}
示例3: testFlickrCrawl
public function testFlickrCrawl() {
$builders = $this->buildData();
$crawler = Crawler::getInstance();
$config = Config::getInstance();
//use fake Flickr API key
$plugin_builder = FixtureBuilder::build('plugins', array('id'=>'2', 'folder_name'=>'flickrthumbnails'));
$option_builder = FixtureBuilder::build('options', array(
'namespace' => OptionDAO::PLUGIN_OPTIONS . '-2',
'option_name' => 'flickr_api_key',
'option_value' => 'dummykey') );
//$config->setValue('flickr_api_key', 'dummykey');
$this->simulateLogin('admin@example.com', true);
$crawler->crawl();
$ldao = DAOFactory::getDAO('LinkDAO');
$link = $ldao->getLinkById(43);
$this->assertEqual($link->expanded_url, 'http://farm3.static.flickr.com/2755/4488149974_04d9558212_m.jpg');
$this->assertEqual($link->error, '');
$link = $ldao->getLinkById(42);
$this->assertEqual($link->expanded_url, '');
$this->assertEqual($link->error, 'No response from Flickr API');
$link = $ldao->getLinkById(41);
$this->assertEqual($link->expanded_url, '');
$this->assertEqual($link->error, 'No response from Flickr API');
}
示例4: setUp
public function setUp() {
parent::setUp();
$this->webapp = Webapp::getInstance();
$this->crawler = Crawler::getInstance();
$this->webapp->registerPlugin('twitter', 'TwitterPlugin');
$this->crawler->registerCrawlerPlugin('TwitterPlugin');
$this->webapp->setActivePlugin('twitter');
$this->logger = Logger::getInstance();
}
示例5: testCrawlUnauthorized
public function testCrawlUnauthorized()
{
$builders = $this->buildData();
$crawler = Crawler::getInstance();
$crawler->registerPlugin('hellothinkup', 'HelloThinkUpPlugin');
$crawler->registerCrawlerPlugin('HelloThinkUpPlugin');
$this->expectException(new UnauthorizedUserException('You need a valid session to launch the crawler.'));
$crawler->crawl();
$this->assertNoErrors();
}
示例6: testExpandURLsCrawl
function testExpandURLsCrawl()
{
$crawler = Crawler::getInstance();
$crawler->crawl();
//the crawler closes the log so we have to re-open it
$logger = Logger::getInstance();
$ldao = DAOFactory::getDAO('LinkDAO');
$link = $ldao->getLinkById(1);
$this->assertEqual($link->expanded_url, 'http://www.thewashingtonnote.com/archives/2010/04/communications/');
$this->assertEqual($link->error, '');
}
示例7: testCrawl
/**
* Test Crawler->crawl
*/
public function testCrawl()
{
$crawler = Crawler::getInstance();
$crawler->registerPlugin('nonexistent', 'TestFauxPluginOne');
$crawler->registerCrawlerPlugin('TestFauxPluginOne');
$this->expectException(new Exception("The TestFauxPluginOne object does not have a crawl method."));
$crawler->crawl();
$crawler->registerPlugin('hellothinktank', 'HelloThinkTankPlugin');
$crawler->registerCrawlerPlugin('HelloThinkTankPlugin');
$this->assertEqual($crawler->getPluginObject("hellothinktank"), "HelloThinkTankPlugin");
$crawler->crawl();
}
示例8: setUp
/**
* Set up
* Initializes Config and Webapp objects, clears $_SESSION, $_POST, $_REQUEST
*/
public function setUp()
{
parent::setUp();
Loader::register(array(THINKUP_ROOT_PATH . 'tests/', THINKUP_ROOT_PATH . 'tests/classes/', THINKUP_ROOT_PATH . 'tests/fixtures/'));
$config = Config::getInstance();
//disable caching for tests
$config->setValue('cache_pages', false);
//tests assume profiling is off
$config->setValue('enable_profiler', false);
if ($config->getValue('timezone')) {
date_default_timezone_set($config->getValue('timezone'));
}
$webapp = Webapp::getInstance();
$crawler = Crawler::getInstance();
}
示例9: testExpandURLsCrawl
public function testExpandURLsCrawl()
{
$builders = $this->buildData();
$this->simulateLogin('admin@example.com', true);
$crawler = Crawler::getInstance();
$crawler->crawl();
//the crawler closes the log so we have to re-open it
$logger = Logger::getInstance();
$ldao = DAOFactory::getDAO('LinkDAO');
$link = $ldao->getLinkById(1);
$this->assertEqual($link->expanded_url, 'http://www.thewashingtonnote.com/archives/2010/04/communications/');
$this->assertEqual($link->error, '');
$link = $ldao->getLinkById(2);
$this->assertEqual($link->expanded_url, '');
$this->assertEqual($link->error, 'Error expanding URL');
}
示例10: setUp
/**
* Initialize Config and Webapp objects, clear $_SESSION, $_POST, $_GET, $_REQUEST
*/
public function setUp()
{
parent::setUp();
Loader::register(array(THINKUP_ROOT_PATH . 'tests/', THINKUP_ROOT_PATH . 'tests/classes/', THINKUP_ROOT_PATH . 'tests/fixtures/'));
$config = Config::getInstance();
//disable caching for tests
$config->setValue('cache_pages', false);
//tests assume profiling is off
$config->setValue('enable_profiler', false);
if ($config->getValue('timezone')) {
date_default_timezone_set($config->getValue('timezone'));
}
$webapp = Webapp::getInstance();
$crawler = Crawler::getInstance();
$this->DEBUG = getenv('TEST_DEBUG') !== false ? true : false;
self::isTestEnvironmentReady();
}
示例11: authControl
public function authControl()
{
Utils::defineConstants();
if ($this->isAPICall()) {
// If the request comes from an API call, output JSON instead of HTML
$this->setContentType('application/json; charset=UTF-8');
} else {
$this->setContentType('text/html; charset=UTF-8');
$this->setViewTemplate('crawler.run-top.tpl');
echo $this->generateView();
$config = Config::getInstance();
$config->setValue('log_location', false);
//this forces output to just echo to page
$logger = Logger::getInstance();
$logger->close();
}
try {
$logger = Logger::getInstance();
if (isset($_GET['log']) && $_GET['log'] == 'full') {
$logger->setVerbosity(Logger::ALL_MSGS);
echo '<pre style="font-family:Courier;font-size:10px;">';
} else {
$logger->setVerbosity(Logger::USER_MSGS);
$logger->enableHTMLOutput();
}
$crawler = Crawler::getInstance();
//close session so that it's not locked by long crawl
session_write_close();
$crawler->crawl();
$logger->close();
} catch (CrawlerLockedException $e) {
if ($this->isAPICall()) {
// Will be caught and handled in ThinkUpController::go()
throw $e;
} else {
// Will appear in the textarea of the HTML page
echo $e->getMessage();
}
}
if ($this->isAPICall()) {
echo json_encode((object) array('result' => 'success'));
} else {
$this->setViewTemplate('crawler.run-bottom.tpl');
echo $this->generateView();
}
}
示例12: testFlickrCrawl
function testFlickrCrawl()
{
$crawler = Crawler::getInstance();
$config = Config::getInstance();
//use fake Flickr API key
$config->setValue('flickr_api_key', 'dummykey');
$crawler->crawl();
$ldao = DAOFactory::getDAO('LinkDAO');
$link = $ldao->getLinkById(43);
$this->assertEqual($link->expanded_url, 'http://farm3.static.flickr.com/2755/4488149974_04d9558212_m.jpg');
$this->assertEqual($link->error, '');
$link = $ldao->getLinkById(42);
$this->assertEqual($link->expanded_url, '');
$this->assertEqual($link->error, 'No response from Flickr API');
$link = $ldao->getLinkById(41);
$this->assertEqual($link->expanded_url, '');
$this->assertEqual($link->error, 'No response from Flickr API');
}
示例13: authControl
public function authControl()
{
Utils::defineConstants();
if ($this->isAPICall()) {
// If the request comes from an API call, output JSON instead of HTML
$this->setContentType('application/json; charset=UTF-8');
} else {
$this->setPageTitle("ThinkUp Crawler");
$this->setViewTemplate('crawler.run-top.tpl');
$whichphp = exec('which php');
$php_path = !empty($whichphp) ? $whichphp : 'php';
$this->addSuccessMessage('ThinkUp has just started to collect your posts. This is going to take a little ' . 'while, but if you want to see the technical details of what\'s going on, there\'s a log below. ');
$rss_url = THINKUP_BASE_URL . 'rss.php?' . ThinkUpAuthAPIController::getAuthParameters($this->getLoggedInUser());
$this->addInfoMessage('<b>Hint</b><br />You can automate ThinkUp crawls by subscribing to ' . '<strong><a href="' . $rss_url . '" target="_blank">this RSS feed</a></strong> ' . 'in your favorite RSS reader.<br /><br /> Alternately, use the command below to set up a cron job that ' . 'runs hourly to update your posts. (Be sure to change yourpassword to your real password!)<br /><br />' . '<code style="font-family:Courier">cd ' . THINKUP_WEBAPP_PATH . 'crawler/;export THINKUP_PASSWORD=yourpassword; ' . $php_path . ' crawl.php ' . $this->getLoggedInUser() . '</code>');
echo $this->generateView();
echo '<br /><br /><textarea rows="65" cols="110">';
$config = Config::getInstance();
$config->setValue('log_location', false);
//this forces output to just echo to page
$logger = Logger::getInstance();
$logger->close();
// Will make sure any exception catched below appears as plain text, and not as HTML
$this->setContentType('text/plain; charset=UTF-8');
}
try {
$crawler = Crawler::getInstance();
$crawler->crawl();
} catch (CrawlerLockedException $e) {
if ($this->isAPICall()) {
// Will be caught and handled in ThinkUpController::go()
throw $e;
} else {
// Will appear in the textarea of the HTML page
echo $e->getMessage();
}
}
if ($this->isAPICall()) {
echo json_encode((object) array('result' => 'success'));
} else {
echo '</textarea>';
$this->setViewTemplate('crawler.run-bottom.tpl');
echo $this->generateView();
}
}
示例14:
Author: Gina Trapani
*/
/**
*
* ThinkUp/webapp/plugins/expandurls/controller/expandurls.php
*
* Copyright (c) 2009-2011 Gina Trapani
*
* LICENSE:
*
* This file is part of ThinkUp (http://thinkupapp.com).
*
* ThinkUp is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any
* later version.
*
* ThinkUp is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with ThinkUp. If not, see
* <http://www.gnu.org/licenses/>.
*
* @author Gina Trapani <ginatrapani[at]gmail[dot]com>
* @license http://www.gnu.org/licenses/gpl.html
* @copyright 2009-2011 Gina Trapani
*/
$webapp = Webapp::getInstance();
$webapp->registerPlugin('expandurls', 'ExpandURLsPlugin');
$crawler = Crawler::getInstance();
$crawler->registerCrawlerPlugin('ExpandURLsPlugin');
示例15: testGeoEncoderCrawl
function testGeoEncoderCrawl()
{
$builders = $this->buildData();
$this->simulateLogin('admin@example.com', true);
$crawler = Crawler::getInstance();
$crawler->crawl();
//the crawler closes the log so we have to re-open it
$logger = Logger::getInstance();
$pdao = DAOFactory::getDAO('PostDAO');
$ldao = DAOFactory::getDAO('LocationDAO');
// Test 1: Checking Post for Successful Reverse Geoencoding
$this->assertTrue($pdao->isPostInDB(15645300636.0, 'twitter'));
$post = $pdao->getPost(15645300636.0, 'twitter');
$this->assertEqual($post->is_geo_encoded, 1);
$this->assertEqual($post->geo, '28.602815,77.049136');
$this->assertEqual($post->location, 'Sector 4, New Delhi, Haryana, India');
// Since this is just a post, reply_retweet_distance is 0
$this->assertEqual($post->reply_retweet_distance, 0);
// Test 2: Checking Post for successful Reverse Geoencoding
$post = $pdao->getPost(15219161227.0, 'twitter');
$this->assertEqual($post->is_geo_encoded, 1);
$this->assertEqual($post->geo, '28.56213,77.165297');
$this->assertEqual($post->location, 'Vasant Vihar, Munirka, New Delhi, Delhi, India');
// Test: Example of unsuccessful geoencoding resulting out of INVALID_REQUEST.
// NOTE: Not a test case encountered in actual crawl
$post = $pdao->getPost(15331235880.0, 'twitter');
$this->assertEqual($post->is_geo_encoded, 5);
$this->assertEqual($post->geo, '28.60abc2815 77.049136');
// Test 1: Checking Post for successful Geoencoding using "place" field
$post = $pdao->getPost(15052338902.0, 'twitter');
$this->assertEqual($post->is_geo_encoded, 1);
$this->assertEqual($post->geo, '28.6889398,77.1618859');
$this->assertEqual($post->place, 'Sector 8, R.K. Puram, New Delhi');
$this->assertEqual($post->location, 'Keshav Puram Metro Station, Maharaja Nahar Singh Marg, New Delhi, Delhi, India');
// Distance between main post and its reply (Geocoding Process)
$this->assertEqual($post->reply_retweet_distance, 1161);
// Test 2: Checking Post for successful Geoencoding using "place" field
// This post is retrieved from tu_encoded_locations
$post = $pdao->getPost(14914043658.0, 'twitter');
$this->assertEqual($post->is_geo_encoded, 1);
$this->assertEqual($post->place, 'Sector 8, R.K. Puram, New Delhi');
$this->assertEqual($post->location, 'Keshav Puram Metro Station, Maharaja Nahar Singh Marg, New Delhi, Delhi, India');
// When reply is Not in DB, reply_retweet_distance is -1
$this->assertFalse($pdao->isPostInDB(999999, 'twitter'));
$this->assertEqual($post->reply_retweet_distance, -1);
// Test 1: Checking Post for successful Geoencoding using "location" field (post had is_geo_encoded set to 3)
$post = $pdao->getPost(15338041815.0, 'twitter');
$this->assertEqual($post->geo, '19.017656,72.856178');
$this->assertEqual($post->place, NULL);
$this->assertEqual($post->location, 'Mumbai, Maharashtra, India');
$this->assertEqual($post->is_geo_encoded, 1);
// Test 2: Checking Post for successful Geoencoding using "location" field
$post = $pdao->getPost(15344199472.0, 'twitter');
$this->assertEqual($post->location, 'New Delhi, Delhi, India');
$this->assertEqual($post->is_geo_encoded, 1);
// Distance between Post and Retweet (Geocoding Process)
$this->assertEqual($post->reply_retweet_distance, 18);
// When all three fields are filled, <geo> is given the most preference
$post = $pdao->getPost(11259110570.0, 'twitter');
$this->assertEqual($post->geo, '28.56213,77.165297');
$this->assertEqual($post->place, 'Sector 8, R.K. Puram, New Delhi');
$this->assertEqual($post->location, 'Vasant Vihar, Munirka, New Delhi, Delhi, India');
$this->assertEqual($post->is_geo_encoded, 1);
// Distance between reply and post (Reverse Geocoding Process)
$this->assertEqual($post->reply_retweet_distance, 14);
// When only place and location are filled, <place> is given preference
$post = $pdao->getPost(15052338902.0, 'twitter');
$this->assertEqual($post->geo, '28.6889398,77.1618859');
$this->assertEqual($post->place, 'Sector 8, R.K. Puram, New Delhi');
$this->assertEqual($post->location, 'Keshav Puram Metro Station, Maharaja Nahar Singh Marg, New Delhi, Delhi, India');
$this->assertEqual($post->is_geo_encoded, 1);
// Unsuccessful Geoencoding due to place field
// NOTE: Not a test case encountered in real crawl
$post = $pdao->getPost(14913946516.0, 'twitter');
$this->assertEqual($post->geo, NULL);
$this->assertEqual($post->place, 'abc');
$this->assertEqual($post->location, 'New Delhi');
$this->assertEqual($post->is_geo_encoded, 2);
$this->assertEqual($post->reply_retweet_distance, 0);
//Unsuccessful Geoencoding due to location field
$post = $pdao->getPost(15268690400.0, 'twitter');
$this->assertEqual($post->geo, NULL);
$this->assertEqual($post->place, NULL);
$this->assertEqual($post->location, 'abc');
$this->assertEqual($post->is_geo_encoded, 2);
//Unsuccessful Geoencoding due to location field resulting in INVALID_REQUEST
$post = $pdao->getPost(15244973830.0, 'twitter');
$this->assertEqual($post->location, 'Ü');
$this->assertEqual($post->is_geo_encoded, 5);
//Unsuccessful Geoencoding due to all three fields being empty
$post = $pdao->getPost(15435434230.0, 'twitter');
$this->assertEqual($post->geo, NULL);
$this->assertEqual($post->place, NULL);
$this->assertEqual($post->location, NULL);
$this->assertEqual($post->is_geo_encoded, 6);
//Reverse Geoencoding when latitude and longitude are found in location field instead of geo field
$post = $pdao->getPost(13212618909.0, 'twitter');
$this->assertEqual($post->geo, '40.681839,-73.983734');
$this->assertEqual($post->place, NULL);
$this->assertEqual($post->location, 'Boerum Hill, Brooklyn, NY, USA');
//.........这里部分代码省略.........