本文整理汇总了PHP中Crawler::go_to方法的典型用法代码示例。如果您正苦于以下问题:PHP Crawler::go_to方法的具体用法?PHP Crawler::go_to怎么用?PHP Crawler::go_to使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Crawler
的用法示例。
在下文中一共展示了Crawler::go_to方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: crawl_page
public function crawl_page($url)
{
// crawl_page
$c = new Crawler($url);
// get title
$c->go_to('<title>');
$title = Crawler::extract($c->curline, 'PHD Comics: ', '</title>');
$title = preg_replace('/\\W/', '_', $title);
// get the date
$c->go_to('date_left.gif');
$c->readline(2);
$line = $c->curline;
preg_match('/([0-9]+)\\/([0-9]+)\\/([0-9]+)/mi', $line, $matches);
//print_r($matches);flush();
list($full, $month, $date, $year) = $matches;
if (strlen($date) < 2) {
$date = '0' . $date;
}
if (strlen($month) < 2) {
$month = '0' . $month;
}
$fileprefix = "{$year}_{$month}_{$date}_{$title}";
// get the img url
$c->go2linewhere('<td bgcolor=#FFFFFF');
$line = $c->curline;
preg_match('/<img src=["\']?([^ ]+)["\']?/i', $line, $matches);
$img = $matches[1];
$filename = basename($img);
$ext = substr($filename, strrpos($filename, '.'));
echo "<a href='{$img}'>" . $fileprefix . $ext . "</a><br/>";
flush();
$c->close();
unset($c);
}
示例2: crawl_1_chapter
function crawl_1_chapter($url, $chapter)
{
global $sitename;
global $prefix;
$c = new Crawler($url);
$c->go_to('name="pagejump"');
$pages = array();
while ($line = $c->readline()) {
if (Crawler::is_there($line, '<option')) {
$pages[] = Crawler::extract($line, 'value="', '"');
} else {
if (Crawler::is_there($line, '</select>')) {
break;
}
}
}
$c->go_to('id="nextpage"');
$c->readline();
$img = $c->getbetween('src="', '"');
$c->close();
$img_base = dirname($img);
$ext = '.jpg';
$chapter = Crawler::pad($chapter, 3);
foreach ($pages as $page) {
echo "<a href='{$img_base}/{$page}{$ext}'>{$prefix}-{$chapter}-{$page}{$ext}</a><br/>\n";
flush();
}
//print_r($pages);flush();
}
示例3: crawl_1_page
function crawl_1_page($url)
{
echo "URL2 {$url} <br/>\n";
flush();
$dirname = html_entity_decode(Crawler::cutfromlast1(substr($url, 0, strlen($url) - 1), '/'));
$hasil = array();
$c = new Crawler($url);
$c->go_to('<div class="entry">');
while ($line = $c->readline()) {
if (Crawler::is_there($line, "href='")) {
$img = Crawler::extract($line, "href='", "'");
echo "<a href='{$img}'>{$dirname}</a><br/>\n";
flush();
} else {
if (Crawler::is_there($line, 'href="')) {
$img = Crawler::extract($line, 'href="', '"');
echo "<a href='{$img}'>{$dirname}</a><br/>\n";
flush();
} else {
if (Crawler::is_there($line, '</div>')) {
break;
}
}
}
}
$c->close();
}
示例4: crawl_album
function crawl_album($url, $alias = false)
{
$c = new Crawler($url);
$c->go_to('<noscript>');
$c->go_to('<noscript>');
$c->readline();
$target = '';
//$c->curline;
while ($line = $c->readline()) {
if (Crawler::is_there($line, '</noscript>')) {
break;
} else {
$target .= trim($line);
}
}
$hasil = Crawler::extract_to_array($target, 'src="', '"');
$c->close();
/* kalo mo ngambil desc sebagai nama file
preg_match_all('/<img src="([^"]+)"><\\/a><p><a [^>]+>([^<]+)<\\/a>/', $target, $match);
//file_put_contents('picasaweb.out', print_r($match, true));exit;
foreach ($match[1] as $i => $uri) {
$info = pathinfo(basename($uri));
$ext = $info['extension'];
$name = $match[2][$i];
$img = str_replace('/s128/', '/', $uri);
echo "<a href='$img'>$name.$ext</a><br />\n";
}
exit;
*/
if ($alias) {
foreach ($hasil as $img) {
$img = str_replace('/s128/', '/d/', $img);
echo "<a href='{$img}'>{$alias}</a><br/>\n";
flush();
}
} else {
foreach ($hasil as $img) {
$img = str_replace('/s128/', '/d/', $img);
$basename = urldecode(basename($img));
echo "<a href='{$img}'>{$basename}</a><br/>\n";
flush();
}
}
}
示例5: go
public function go()
{
$mark1 = '<a target="_blank" title="Show fullsized image" href=';
$mark2 = '<a title="Next Image" rel="next" href=';
$host = 'http://lu.scio.us';
$finish = false;
$number = 0;
$url = $this->url;
preg_match('/\\/([^\\/]+)\\/page\\/1/', $url, $m);
$text = $m[1];
while (!$finish) {
echo $url . "<br/>\n";
flush();
$c = new Crawler($url);
$c->go_to('id="pid_');
while ($line = $c->readline()) {
if (Crawler::is_there($line, 'src="')) {
$img = Crawler::extract($line, 'src="', '"');
$img = str_replace('thumb_100_', @$_GET['big'] ? '' : 'normal__', $img);
$num = Crawler::pad(++$number, 3);
$filnm = basename($img);
$ext = Crawler::cutafter($filnm, '.');
// $text = $num . $ext;
// preg_match('/\/(\d+\/\d+)\//', $img, $m);
// $text = $m[1];
echo "<a href='{$img}'>{$text}</a><br/>\n";
flush();
} else {
if (Crawler::is_there($line, '</ul>')) {
break;
}
}
}
$c->go_to('class="pager"');
$c->readline();
if (Crawler::is_there($c->curline, 'Pager_next')) {
$finish = false;
$url = $host . Crawler::extract($c->curline, '<a rel="next" href="', '"');
} else {
$finish = true;
}
$c->close();
}
}
示例6: go
public function go()
{
// http://www.fakku.net/viewonline.php?id=2589
// pake curl
$base = 'http://www.fakku.net';
// $this->url = str_replace('viewmanga.php', 'viewonline.php', $this->url);
if (!preg_match('/\\/read$/', $this->url)) {
$this->url .= '/read';
}
/*
$ch = curl_init($this->url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
file_put_contents('fakku.temp', curl_exec($ch));
curl_close($ch);
*/
$craw = new Crawler($this->url, array('use_curl' => true, 'agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13'));
$craw->go_to('var data = {');
$json = Crawler::extract($craw->curline, ' = ', ';');
$obj = json_decode($json);
/*
$craw->go_to('var mirror = ');
$mirror = $craw->getbetween("'", "'");
$craw->go_to('var mirror = ');
$mirror2 = $craw->getbetween("'", "'");
if ($mirror2) $mirror = $mirror2;
*/
// 2012-05-06 fakku berubah
$craw->go_to('function imgpath(');
$craw->go_to('return \'');
$imgpath = $craw->getbetween("return '", "';");
$craw->close();
$dir = basename(dirname($this->url));
foreach ($obj->thumbs as $key => $val) {
$filename = Crawler::pad($key + 1, 3);
// $img = $mirror . '/' . $obj->meta->dir . 'images/' . $filename;
$img = str_replace("' + x + '", $filename, $imgpath);
$text = $dir;
echo "<a href='{$img}'>{$text}</a><br/>\n";
flush();
}
}
示例7: mangareader_1_page
public function mangareader_1_page($fil, $url, $prefix, $chapter)
{
$chapter = Crawler::pad($chapter, 3);
$c = new Crawler($fil);
$c->go_to('width="800"');
$img = $c->getbetween('src="', '"');
preg_match('/(\\d+\\.\\w+)$/', basename($img), $m);
$iname = $m[1];
$c->close();
$name = $prefix . '-' . $chapter . '-' . $iname;
return array($name => $img);
}
示例8: mangareader_1_page
public function mangareader_1_page($fil, $url, $chapter)
{
$prefix = $this->prefix;
$chapter = Crawler::pad($chapter, 3);
$c = new Crawler($fil);
$c->go_to('width="800"');
$img = $c->getbetween('src="', '"');
// if (@$_GET['show_url']) echo "<a href='$url'>URL</a> ";
preg_match('/(\\d+\\.\\w+)$/', basename($img), $m);
$iname = $m[1];
echo '<li><a href="' . $img . '">' . $prefix . '-' . $chapter . '-' . $iname . '</a>' . "</li>\n";
$c->close();
}
示例9: download_all
private function download_all($base, $destination)
{
$sitename = "http://www.imagefap.com";
$finish = false;
$firstbase = $base;
$i = 1;
while (!$finish) {
$c = new Crawler($base);
echo $base . "\n";
$c->go_to(array('<table style=', ':: next ::'));
if (Crawler::is_there($c->curline, ':: next ::')) {
$finish = false;
$urld = Crawler::extract($c->curline, 'href="', '"');
$base = $firstbase . html_entity_decode($urld);
$c->go_to('<table style=');
} else {
$finish = true;
}
while ($line = $c->readline()) {
if (Crawler::is_there($line, 'border=0')) {
$img = Crawler::extract($line, 'src="', '"');
$img = str_replace('/thumb/', '/full/', $img);
$img = preg_replace('/\\/x\\d\\./', '/', $img);
$filename = basename($img);
$ext = Crawler::cutfromlast($filename, '.');
$text = Crawler::n($i++, 4);
$this->save_to($img, "{$destination}/{$text}{$ext}");
} else {
if (Crawler::is_there($line, '</form>')) {
break;
}
}
}
$c->close();
}
}
示例10: crawl_1_chapter
function crawl_1_chapter($url, $chapter)
{
global $sitename;
global $prefix;
// http://ani-haven.net/hr-alpha/Psyren/145/
// @todo
$chapter = Crawler::pad($chapter, 3);
$c = new Crawler($url);
$c->go_to('id="myselectbox3"');
$c->readline();
$pages = Crawler::extract_to_array($c->curline, 'value="', '"');
$c->close();
// append $url ke $pages
foreach ($pages as $i => $page) {
$pages[$i] = $url . $page;
}
Crawler::multiProcess(4, $pages, 'crawl_1_page', array($chapter));
}
示例11: foolreader_1_chapter
function foolreader_1_chapter($url, $chapter)
{
global $sitename;
global $prefix;
$chapter = Crawler::pad($chapter, 3);
$c = new Crawler($url);
$c->go_to('imageArray = new Array');
while ($line = $c->readline()) {
if (Crawler::is_there($line, 'imageArray[')) {
$img = Crawler::extract($line, "'", "'");
if (strpos($img, 'http://') !== 0) {
$img = $sitename . $img;
}
$fname = basename($img);
echo "<a href='{$img}'>{$prefix}-{$chapter}-{$fname}</a><br/>\n";
} else {
if (Crawler::is_there($line, 'function loadImage')) {
break;
}
}
}
$c->close();
/*
// @TODO
$pages = array();
while ($line = $c->readline()) {
if (Crawler::is_there($line, '<option')) {
$pages[] = $sitename . Crawler::extract($line, 'value=\'', "'");
} else if (Crawler::is_there($line, '</select>')) {
break;
}
}
//$pages = Crawler::extract_to_array($c->curline, 'value="', '"');
$c->close();
//Crawler::multiProcess(4, $pages, 'foolreader_1_page', array($chapter));
*/
}
示例12: array
$c->go_to('class="ehggt"');
$pages = array();
while ($line = $c->readline()) {
if (Crawler::is_there($line, '"ehga"')) {
$pages[] = Crawler::extract($line, 'href="', '"');
} else {
if (Crawler::is_there($line, '</table>')) {
break;
}
}
}
$c->close();
foreach ($pages as $url) {
echo "URL:{$url}<br/>\n";
$c = new Crawler($url, array('use_curl' => true));
$c->go_to('</span>');
// ambil image source
$raws = Crawler::extract_to_array($c->curline, 'src="', '"');
echo '<pre>';
print_r($raws);
echo '</pre>';
// gambar image biasanya berada di $raws[4] atau $raws[5]
if (Crawler::is_there($raws[0], '/n/next.png')) {
array_shift($raws);
}
// gambar image namanya lebih panjang
$base1 = basename($raws[4]);
$base2 = basename($raws[5]);
if (strlen($base1) > strlen($base2)) {
$img = $raws[4];
} else {
示例13: download_it
require_once 'crawler.php';
$base = 'http://disneycomics.free.fr/';
$tree = array('Carl Barks' => 'index_barks_date.php', 'Don Rosa' => 'index_rosa_date.php', 'Marco Rota' => 'index_rota_date.php', 'Romano Scarpa' => 'index_scarpa_date.php', 'Tony Strobl' => 'index_strobl_date.php', 'Al Taliaferro' => 'index_taliaferro.php', 'Vicar' => 'index_vicar_date.php', 'William Van Horn' => 'index_vanhorn_date.php', 'Paul Murry' => 'index_murry_date.php', 'Daily Strips' => 'index_dailies.php', 'Sunday Strips' => 'index_sunday.php');
function download_it($img_url, $output_file)
{
$dir = dirname($output_file) . '\\';
//exec("mkdir \"$dir\"");
exec("wget -t 0 --retry-connrefused -O \"{$output_file}\" {$img_url}");
}
$mode = 'phase3b';
switch ($mode) {
case 'beginning':
$result = array();
foreach ($tree as $name => $link) {
$c = new Crawler($base . $link);
$c->go_to('<tbody>');
while ($line = $c->readline()) {
if (Crawler::is_there($line, '<tr>')) {
$line = $c->readline();
// nomor urut
if (!Crawler::is_there($line, '<a href')) {
$line = $c->readline();
// Hero
if (!Crawler::is_there($line, '<a href')) {
$line = $c->readline();
// Title dan link
}
}
// ada yg berupa original/reprint, ...
if (preg_match('/class="red">(.*)<\\/h4>.*class="blue" href="([^"]*)">original<.*href="([^"]*)">reprint</', $line, $matches)) {
$result[$name][strip_tags($matches[1]) . '-original'] = html_entity_decode($matches[2]);
示例14: omfggscans_chapters
function omfggscans_chapters($chapters, $infixs)
{
global $base;
global $sitename;
global $prefix;
foreach ($chapters as $key => $val) {
$url = $base . "&c={$val}";
$ifx = Crawler::pad($infixs[$key], 3);
echo "{$url}<br/>\n";
$c = new Crawler($url);
// retrieve pages
$c->go_to("name='page'");
$pages = array();
while ($line = $c->readline()) {
if (Crawler::is_there($line, '<option')) {
$pg = Crawler::extract($line, "value='", "'");
$pgtext = Crawler::extract($line, "'>", "</");
$pages[$pg] = $pgtext;
} else {
if (Crawler::is_there($line, '</select>')) {
break;
}
}
}
// sample image url
$c->go_to("class='manga-img'");
$src = Crawler::extract($c->curline, 'src="', '"');
$pre_src = dirname($src) . '/';
$post_src = '.png';
$c->close();
foreach ($pages as $k => $v) {
$href = $pre_src . $v . $post_src;
$text = "{$prefix}-{$ifx}-{$v}{$post_src}";
echo "<a href='{$href}'>{$text}</a><br />\n";
}
}
}
示例15: rule34
function rule34($url)
{
$text = rawurldecode(basename(dirname($url)));
$site = 'http://rule34.paheal.net';
$continue = true;
while ($continue) {
echo "{$url}<br/>";
$c = new Crawler($url);
$c->go_to("id='Navigationleft'");
// $c->readline();
// $c->readline();
$line = $c->curline;
if (preg_match('/<a href="([^\'"]+)">Next/', $line, $m)) {
$url = $site . $m[1];
} else {
$continue = false;
}
$c->go_to("id='image-list'");
while ($line = $c->readline()) {
if (Crawler::is_there($line, '>Image Only<')) {
$href = Crawler::extract($line, '<br><a href="', '"');
echo "<a href='{$href}'>{$text}</a><br/>\n";
} else {
if (Crawler::is_there($line, '<footer>')) {
break;
}
}
}
}
}