当前位置: 首页>>代码示例>>PHP>>正文


PHP Crawler::close方法代码示例

本文整理汇总了PHP中Crawler::close方法的典型用法代码示例。如果您正苦于以下问题:PHP Crawler::close方法的具体用法?PHP Crawler::close怎么用?PHP Crawler::close使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Crawler的用法示例。


在下文中一共展示了Crawler::close方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: crawl_page

 public function crawl_page($url)
 {
     // crawl_page
     $c = new Crawler($url);
     // get title
     $c->go_to('<title>');
     $title = Crawler::extract($c->curline, 'PHD Comics: ', '</title>');
     $title = preg_replace('/\\W/', '_', $title);
     // get the date
     $c->go_to('date_left.gif');
     $c->readline(2);
     $line = $c->curline;
     preg_match('/([0-9]+)\\/([0-9]+)\\/([0-9]+)/mi', $line, $matches);
     //print_r($matches);flush();
     list($full, $month, $date, $year) = $matches;
     if (strlen($date) < 2) {
         $date = '0' . $date;
     }
     if (strlen($month) < 2) {
         $month = '0' . $month;
     }
     $fileprefix = "{$year}_{$month}_{$date}_{$title}";
     // get the img url
     $c->go2linewhere('<td bgcolor=#FFFFFF');
     $line = $c->curline;
     preg_match('/<img src=["\']?([^ ]+)["\']?/i', $line, $matches);
     $img = $matches[1];
     $filename = basename($img);
     $ext = substr($filename, strrpos($filename, '.'));
     echo "<a href='{$img}'>" . $fileprefix . $ext . "</a><br/>";
     flush();
     $c->close();
     unset($c);
 }
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:34,代码来源:phdcomics.php

示例2: crawl_1_page

function crawl_1_page($url)
{
    echo "URL2 {$url} <br/>\n";
    flush();
    $dirname = html_entity_decode(Crawler::cutfromlast1(substr($url, 0, strlen($url) - 1), '/'));
    $hasil = array();
    $c = new Crawler($url);
    $c->go_to('<div class="entry">');
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, "href='")) {
            $img = Crawler::extract($line, "href='", "'");
            echo "<a href='{$img}'>{$dirname}</a><br/>\n";
            flush();
        } else {
            if (Crawler::is_there($line, 'href="')) {
                $img = Crawler::extract($line, 'href="', '"');
                echo "<a href='{$img}'>{$dirname}</a><br/>\n";
                flush();
            } else {
                if (Crawler::is_there($line, '</div>')) {
                    break;
                }
            }
        }
    }
    $c->close();
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:27,代码来源:reallycuteasians.php

示例3: crawl_1_chapter

function crawl_1_chapter($url, $chapter)
{
    global $sitename;
    global $prefix;
    $c = new Crawler($url);
    $c->go_to('name="pagejump"');
    $pages = array();
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, '<option')) {
            $pages[] = Crawler::extract($line, 'value="', '"');
        } else {
            if (Crawler::is_there($line, '</select>')) {
                break;
            }
        }
    }
    $c->go_to('id="nextpage"');
    $c->readline();
    $img = $c->getbetween('src="', '"');
    $c->close();
    $img_base = dirname($img);
    $ext = '.jpg';
    $chapter = Crawler::pad($chapter, 3);
    foreach ($pages as $page) {
        echo "<a href='{$img_base}/{$page}{$ext}'>{$prefix}-{$chapter}-{$page}{$ext}</a><br/>\n";
        flush();
    }
    //print_r($pages);flush();
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:29,代码来源:mangashare.php

示例4: go

 public function go()
 {
     $start_url = $this->url;
     $c = new Crawler($start_url);
     $c->go2linewhere('<p><a href="');
     $c->close();
     $ledak = explode('<a href="', $c->curline);
     for ($i = 1; $i < count($ledak); ++$i) {
         $aurl = Crawler::cutuntil($ledak[$i], '"');
         $aurl = str_replace('http://hentaifromhell.net/redirect.html?', '', $aurl);
         echo "<a href='{$aurl}'>{$aurl}</a><br />\n";
         flush();
         /*
         $basename = Crawler::cutuntillast($aurl, '/');
         if (!in_array($basename, $this->blacklist)) {
         	$c = new Crawler($aurl);
         	$c->go2linewhere('id="thepic"');
         	$imgurl = $c->getbetween('SRC="', '"');
         	$c->close();
         	echo "<a href='$basename/$imgurl'>".Crawler::n($i,3).".jpg</a><br />\n";
         	flush();
         } else {
         	echo "$i blacklisted server<br/>";flush();
         }
         */
     }
 }
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:27,代码来源:spider_hfhgallery3.php

示例5: crawl_indowebster

function crawl_indowebster($url)
{
    //echo "'$url'";
    $craw = new Crawler($url);
    $craw->go2lineregexor('/(<\\/div><\\/a><\\/div><\\/div>)/', 1, 'href="#idws7"');
    $setring = $craw->getbetween('location.href=\'', '\'');
    $path = Crawler::extract($setring, 'path=', '&');
    $file_orig = Crawler::cutafter($setring, 'file_orig=');
    $craw->close();
    return '<a href="' . dirname($setring) . '/' . $path . '">' . rawurldecode($file_orig) . '</a>';
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:11,代码来源:indowebster.php

示例6: mangareader_1_page

 public function mangareader_1_page($fil, $url, $prefix, $chapter)
 {
     $chapter = Crawler::pad($chapter, 3);
     $c = new Crawler($fil);
     $c->go_to('width="800"');
     $img = $c->getbetween('src="', '"');
     preg_match('/(\\d+\\.\\w+)$/', basename($img), $m);
     $iname = $m[1];
     $c->close();
     $name = $prefix . '-' . $chapter . '-' . $iname;
     return array($name => $img);
 }
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:12,代码来源:Mangareader_Crawler.php

示例7: mangareader_1_page

 public function mangareader_1_page($fil, $url, $chapter)
 {
     $prefix = $this->prefix;
     $chapter = Crawler::pad($chapter, 3);
     $c = new Crawler($fil);
     $c->go_to('width="800"');
     $img = $c->getbetween('src="', '"');
     // if (@$_GET['show_url']) echo "<a href='$url'>URL</a> ";
     preg_match('/(\\d+\\.\\w+)$/', basename($img), $m);
     $iname = $m[1];
     echo '<li><a href="' . $img . '">' . $prefix . '-' . $chapter . '-' . $iname . '</a>' . "</li>\n";
     $c->close();
 }
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:13,代码来源:mangareader.php

示例8: crawl_one_page

function crawl_one_page($url)
{
    $nims = array();
    $kraw = new Crawler($url);
    $kraw->go2linewhere('------------------------------------------');
    $kraw->go2linewhere('------------------------------------------');
    $kraw->readline();
    while ($kraw->strpos('------------------------------------------') === false) {
        $nims[] = $kraw->getbetween(' ', '  ');
        $kraw->readline();
    }
    $kraw->close();
    return $nims;
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:14,代码来源:akademik.php

示例9: go

 public function go()
 {
     $start_url = $this->url;
     if (preg_match('/gallery1\\.hentaifromhell\\.net/', $start_url)) {
         $base_url = 'http://gallery1.hentaifromhell.net';
     } else {
         $base_url = 'http://gallery.hentaifromhell.net';
     }
     $finish = false;
     while (!$finish) {
         $finish = true;
         echo $start_url, "<br />\n";
         flush();
         $c = new Crawler($start_url);
         $c->go2linewhere('<li class="thumbnail">');
         while ($line = $c->readline()) {
             //echo "<pre>$line</pre><br/>\n";flush();
             if (strpos($line, 'src="') !== false) {
                 //ambil gambar
                 $uri = Crawler::extract($line, 'src="', '"');
                 $uri = str_replace('/thumbs/', '/images/', $uri);
                 preg_match('/(\\/small\\/\\d+-)/', $uri, $matches);
                 $uri = str_replace($matches[1], '/', $uri);
                 //$uri = html_entity_decode($uri);
                 //$this->extract_page($uri);
                 $file = basename(dirname($uri));
                 echo "<a href='{$uri}'>{$file}</a><br/>\n";
                 flush();
             } else {
                 if (strpos($line, 'class="pagNext"') !== false) {
                     //next page
                     $finish = false;
                     $start_url = html_entity_decode(Crawler::extract($line, 'class="pagNext" href="', '"'));
                     break;
                 } else {
                     if (strpos($line, '</table>') !== false) {
                         // selesai
                         break;
                     }
                 }
             }
         }
         $c->close();
     }
 }
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:45,代码来源:spider_hfhgallery2.php

示例10: crawl_1_chapter

function crawl_1_chapter($url, $chapter)
{
    global $sitename;
    global $prefix;
    // http://ani-haven.net/hr-alpha/Psyren/145/
    // @todo
    $chapter = Crawler::pad($chapter, 3);
    $c = new Crawler($url);
    $c->go_to('id="myselectbox3"');
    $c->readline();
    $pages = Crawler::extract_to_array($c->curline, 'value="', '"');
    $c->close();
    // append $url ke $pages
    foreach ($pages as $i => $page) {
        $pages[$i] = $url . $page;
    }
    Crawler::multiProcess(4, $pages, 'crawl_1_page', array($chapter));
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:18,代码来源:havenreader.php

示例11: go

 public function go()
 {
     $mark1 = '<a target="_blank" title="Show fullsized image" href=';
     $mark2 = '<a title="Next Image" rel="next" href=';
     $host = 'http://lu.scio.us';
     $finish = false;
     $number = 0;
     $url = $this->url;
     preg_match('/\\/([^\\/]+)\\/page\\/1/', $url, $m);
     $text = $m[1];
     while (!$finish) {
         echo $url . "<br/>\n";
         flush();
         $c = new Crawler($url);
         $c->go_to('id="pid_');
         while ($line = $c->readline()) {
             if (Crawler::is_there($line, 'src="')) {
                 $img = Crawler::extract($line, 'src="', '"');
                 $img = str_replace('thumb_100_', @$_GET['big'] ? '' : 'normal__', $img);
                 $num = Crawler::pad(++$number, 3);
                 $filnm = basename($img);
                 $ext = Crawler::cutafter($filnm, '.');
                 // $text = $num . $ext;
                 // preg_match('/\/(\d+\/\d+)\//', $img, $m);
                 // $text = $m[1];
                 echo "<a href='{$img}'>{$text}</a><br/>\n";
                 flush();
             } else {
                 if (Crawler::is_there($line, '</ul>')) {
                     break;
                 }
             }
         }
         $c->go_to('class="pager"');
         $c->readline();
         if (Crawler::is_there($c->curline, 'Pager_next')) {
             $finish = false;
             $url = $host . Crawler::extract($c->curline, '<a rel="next" href="', '"');
         } else {
             $finish = true;
         }
         $c->close();
     }
 }
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:44,代码来源:spider_luscious.php

示例12: crawl_album

function crawl_album($url, $alias = false)
{
    $c = new Crawler($url);
    $c->go_to('<noscript>');
    $c->go_to('<noscript>');
    $c->readline();
    $target = '';
    //$c->curline;
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, '</noscript>')) {
            break;
        } else {
            $target .= trim($line);
        }
    }
    $hasil = Crawler::extract_to_array($target, 'src="', '"');
    $c->close();
    /* kalo mo ngambil desc sebagai nama file
    	preg_match_all('/<img src="([^"]+)"><\\/a><p><a [^>]+>([^<]+)<\\/a>/', $target, $match);
    	//file_put_contents('picasaweb.out', print_r($match, true));exit;
    	foreach ($match[1] as $i => $uri) {
    		$info = pathinfo(basename($uri));
    		$ext = $info['extension'];
    		$name = $match[2][$i];
    		$img = str_replace('/s128/', '/', $uri);
    		echo "<a href='$img'>$name.$ext</a><br />\n";
    	}
    	exit;
    	*/
    if ($alias) {
        foreach ($hasil as $img) {
            $img = str_replace('/s128/', '/d/', $img);
            echo "<a href='{$img}'>{$alias}</a><br/>\n";
            flush();
        }
    } else {
        foreach ($hasil as $img) {
            $img = str_replace('/s128/', '/d/', $img);
            $basename = urldecode(basename($img));
            echo "<a href='{$img}'>{$basename}</a><br/>\n";
            flush();
        }
    }
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:44,代码来源:picasaweb.php

示例13: go

 public function go()
 {
     // http://www.fakku.net/viewonline.php?id=2589
     // pake curl
     $base = 'http://www.fakku.net';
     // $this->url = str_replace('viewmanga.php', 'viewonline.php', $this->url);
     if (!preg_match('/\\/read$/', $this->url)) {
         $this->url .= '/read';
     }
     /*
     $ch = curl_init($this->url);
     curl_setopt($ch, CURLOPT_HEADER, 0);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
     file_put_contents('fakku.temp', curl_exec($ch));
     curl_close($ch);
     */
     $craw = new Crawler($this->url, array('use_curl' => true, 'agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13'));
     $craw->go_to('var data = {');
     $json = Crawler::extract($craw->curline, ' = ', ';');
     $obj = json_decode($json);
     /*
     $craw->go_to('var mirror = ');
     $mirror = $craw->getbetween("'", "'");
     $craw->go_to('var mirror = ');
     $mirror2 = $craw->getbetween("'", "'");
     if ($mirror2) $mirror = $mirror2;
     */
     // 2012-05-06 fakku berubah
     $craw->go_to('function imgpath(');
     $craw->go_to('return \'');
     $imgpath = $craw->getbetween("return '", "';");
     $craw->close();
     $dir = basename(dirname($this->url));
     foreach ($obj->thumbs as $key => $val) {
         $filename = Crawler::pad($key + 1, 3);
         // $img = $mirror . '/' . $obj->meta->dir . 'images/' . $filename;
         $img = str_replace("' + x + '", $filename, $imgpath);
         $text = $dir;
         echo "<a href='{$img}'>{$text}</a><br/>\n";
         flush();
     }
 }
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:42,代码来源:spider_fakku.php

示例14: foolreader_1_chapter

function foolreader_1_chapter($url, $chapter)
{
    global $sitename;
    global $prefix;
    $chapter = Crawler::pad($chapter, 3);
    $c = new Crawler($url);
    $c->go_to('imageArray = new Array');
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, 'imageArray[')) {
            $img = Crawler::extract($line, "'", "'");
            if (strpos($img, 'http://') !== 0) {
                $img = $sitename . $img;
            }
            $fname = basename($img);
            echo "<a href='{$img}'>{$prefix}-{$chapter}-{$fname}</a><br/>\n";
        } else {
            if (Crawler::is_there($line, 'function loadImage')) {
                break;
            }
        }
    }
    $c->close();
    /*
    // @TODO
    $pages = array();
    while ($line = $c->readline()) {
    	if (Crawler::is_there($line, '<option')) {
    		$pages[] = $sitename . Crawler::extract($line, 'value=\'', "'");
    	} else if (Crawler::is_there($line, '</select>')) {
    		break;
    	}
    }
    //$pages = Crawler::extract_to_array($c->curline, 'value="', '"');
    $c->close();
    
    //Crawler::multiProcess(4, $pages, 'foolreader_1_page', array($chapter));
    */
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:38,代码来源:foolreader.php

示例15: crawl1page

function crawl1page($url)
{
    echo 'Entering ' . $url . '<br/>';
    flush();
    $c = new Crawler($url);
    $c->go2linewhere('<div class="ngg-gallery-thumbnail"');
    $c->readline();
    $sample = $c->getbetween('href="', '"');
    $c->close();
    $dir = dirname($sample);
    if (!$dir) {
        return;
    }
    $folder = substr($dir, strrpos($dir, '/') + 1);
    $dir = dirname($dir) . '/' . rawurlencode($folder) . '/';
    echo 'Dir:' . $dir . '<br/>' . "\n";
    flush();
    $c = new Crawler($dir);
    $c->go2linewhere('<ul>');
    $c->readline();
    while ($line = $c->readline()) {
        //echo $line;flush();
        if (strpos($line, '</ul>') !== false) {
            break;
        } else {
            if (strpos($line, '"thumbs/"')) {
                break;
            }
        }
        $filename = Crawler::extract($line, 'href="', '"');
        echo '<a href="' . $dir . $filename . '">' . rawurldecode($filename) . '</a><br/>' . "\n";
        flush();
    }
    $c->close();
    echo '<br/>' . "\n";
    flush();
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:37,代码来源:xgallery1.php


注:本文中的Crawler::close方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。