当前位置: 首页>>代码示例>>PHP>>正文


PHP Crawler::extract_to_array方法代码示例

本文整理汇总了PHP中Crawler::extract_to_array方法的典型用法代码示例。如果您正苦于以下问题:PHP Crawler::extract_to_array方法的具体用法?PHP Crawler::extract_to_array怎么用?PHP Crawler::extract_to_array使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Crawler的用法示例。


在下文中一共展示了Crawler::extract_to_array方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: crawl_1_chapter

function crawl_1_chapter($url, $chapter)
{
    global $sitename;
    global $prefix;
    // http://ani-haven.net/hr-alpha/Psyren/145/
    // @todo
    $chapter = Crawler::pad($chapter, 3);
    $c = new Crawler($url);
    $c->go_to('id="myselectbox3"');
    $c->readline();
    $pages = Crawler::extract_to_array($c->curline, 'value="', '"');
    $c->close();
    // append $url ke $pages
    foreach ($pages as $i => $page) {
        $pages[$i] = $url . $page;
    }
    Crawler::multiProcess(4, $pages, 'crawl_1_page', array($chapter));
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:18,代码来源:havenreader.php

示例2: crawl_album

function crawl_album($url, $alias = false)
{
    $c = new Crawler($url);
    $c->go_to('<noscript>');
    $c->go_to('<noscript>');
    $c->readline();
    $target = '';
    //$c->curline;
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, '</noscript>')) {
            break;
        } else {
            $target .= trim($line);
        }
    }
    $hasil = Crawler::extract_to_array($target, 'src="', '"');
    $c->close();
    /* kalo mo ngambil desc sebagai nama file
    	preg_match_all('/<img src="([^"]+)"><\\/a><p><a [^>]+>([^<]+)<\\/a>/', $target, $match);
    	//file_put_contents('picasaweb.out', print_r($match, true));exit;
    	foreach ($match[1] as $i => $uri) {
    		$info = pathinfo(basename($uri));
    		$ext = $info['extension'];
    		$name = $match[2][$i];
    		$img = str_replace('/s128/', '/', $uri);
    		echo "<a href='$img'>$name.$ext</a><br />\n";
    	}
    	exit;
    	*/
    if ($alias) {
        foreach ($hasil as $img) {
            $img = str_replace('/s128/', '/d/', $img);
            echo "<a href='{$img}'>{$alias}</a><br/>\n";
            flush();
        }
    } else {
        foreach ($hasil as $img) {
            $img = str_replace('/s128/', '/d/', $img);
            $basename = urldecode(basename($img));
            echo "<a href='{$img}'>{$basename}</a><br/>\n";
            flush();
        }
    }
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:44,代码来源:picasaweb.php

示例3: go

 public function go()
 {
     $start_url = $this->url;
     if (preg_match('/gallery1\\.hentaifromhell\\.net/', $start_url)) {
         $base = 'http://gallery1.hentaifromhell.net';
     } else {
         $base = 'http://gallery.hentaifromhell.net';
     }
     $selesai = false;
     while (!$selesai) {
         echo "{$start_url}<br/>\n";
         $craw = new Crawler($start_url);
         $craw->go2linewhere('showimg.php?c=');
         while ($line = $craw->readline()) {
             if (strpos($line, 'showimg.php?c=') !== false) {
                 $raw = Crawler::extract_to_array($line, '<a href="', '"');
                 foreach ($raw as $r) {
                     $href = str_replace('showimg.php?c=', '', $r);
                     $text = basename(dirname($href));
                     // basename($href);
                     echo '<a href="' . $href . '">' . $text . '</a>' . "<br />\n";
                 }
                 // $href = Crawler::extract($line, '<a href="', '"');
             } else {
                 if (strpos($line, 'Next&raquo;') !== false) {
                     if (strpos($line, '<a href') !== false) {
                         $start_url = $base . Crawler::extract($line, '<a href="', '"');
                     } else {
                         $selesai = true;
                     }
                     break;
                 }
             }
         }
         $craw->close();
     }
 }
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:37,代码来源:spider_hfhgallery1.php

示例4: array

 case 'phase2':
     require 'disneycomics.phase1';
     //print_r(array_keys($a));exit;
     $hasil = array();
     foreach ($a as $name => $comics) {
         foreach ($comics as $title => $url) {
             $url = preg_replace('/show.*\\.php.*loc=/', '', $url);
             /*
             $url = str_replace('show.php?s=date&loc=', '', $url);
             $url = str_replace('show.php?loc=', '', $url);
             $url = str_replace('show2.php?loc=', '', $url);
             */
             echo $url . "\n";
             flush();
             $text = file_get_contents($base . $url . '/');
             $raws = Crawler::extract_to_array($text, '<A HREF="', '"');
             $n = count($raws);
             for ($i = 5; $i < $n; $i++) {
                 $raws[$i] = html_entity_decode($raws[$i]);
                 $hasil[$name][$title][$raws[$i]] = $base . $url . '/' . $raws[$i];
                 //echo $base . $url . '/' . $raws[$i] . "\n";flush();
             }
             //$hasil[$name][$title] = $raws;
         }
     }
     ob_start();
     echo "<?php\n\$a = ";
     var_export($hasil);
     echo ';';
     file_put_contents('disneycomics.phase2', ob_get_clean());
     break;
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:31,代码来源:disneycomics.php

示例5: egscans_chapters

 } else {
     if (strpos($base, 'egscans.com')) {
         egscans_chapters($chapters, $infixs);
     } else {
         if (strpos($base, 'omfggscans.com')) {
             omfggscans_chapters($chapters, $infixs);
         } else {
             // $chapters = array_reverse($chapters);
             // $infixs = array_reverse($infixs);
             foreach ($chapters as $key => $val) {
                 $url = $base . '/' . $val;
                 echo "{$url}<br/>";
                 $chapter = $val;
                 $c = new Crawler($url);
                 $c->go_to('name="page"');
                 $pages = Crawler::extract_to_array($c->curline, 'value="', '"');
                 $c->close();
                 //print_r($pages);flush();
                 foreach ($pages as $page) {
                     //echo "$url/$page<br/>";flush();
                     do {
                         try {
                             $c = new Crawler($url . '/' . $page);
                             echo '1';
                             flush();
                             $c->go_to('class="picture"');
                             echo '2';
                             flush();
                             $img = $c->getbetween('<img src="', '"');
                             echo '3';
                             flush();
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:31,代码来源:keishou.php

示例6: Crawler

			<tr>
				<th>Chapter Name</th>
				<th>Infix</th>
			</tr>
<?php 
if ($stage1) {
    echo '<tr><td colspan="2">Progress.. ';
    $c = new Crawler($base);
    $chapters = array();
    $descriptions = array();
    $infix = array();
    // @TODO
    $c->go_to('class="selector"');
    $c->go_to('class="selector"');
    $chapters = Crawler::extract_to_array($c->curline, "href='", "'");
    $raws = Crawler::extract_to_array($c->curline, "class='option'>", '</div');
    //array_shift($raws);
    //array_pop($raws);
    $descriptions = $raws;
    foreach ($descriptions as $desc) {
        preg_match('/(\\d+)$/', $desc, $matches);
        $infix[] = $matches[1];
    }
    /*
    while ($line = $c->readline()) {
    	if (Crawler::is_there($line, 'class="chico"')) {
    		$chp = Crawler::extract($line, 'href="', '"');
    		$chapters[] = $chp;
    		$descriptions[] = strip_tags(Crawler::extract($line, ': ', '</td>'));
    		$ifx = Crawler::cutfromlast1($chp, '/');
    		$ifx = str_replace('chapter-', '', $ifx);
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:31,代码来源:foolreader.php

示例7: explore

 public static function explore($url, $start_sign = '<a href="', $end_sign = '"', $n_skip = 5)
 {
     echo "Currently {$url}\n";
     flush();
     $s = file_get_contents($url);
     $r = array();
     $l = Crawler::extract_to_array($s, $start_sign, $end_sign);
     $n = count($l);
     for ($i = $n_skip; $i < $n; $i++) {
         if (strrpos($l[$i], '/') == strlen($l[$i]) - 1) {
             $r[$l[$i]] = Crawler::explore($url . $l[$i], $start_sign, $end_sign, $n_skip);
         } else {
             $r[$l[$i]] = $l[$i];
         }
     }
     return $r;
 }
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:17,代码来源:crawler.php

示例8: old_hfh_realm

function old_hfh_realm($url)
{
    $name = basename($url);
    $c = new Crawler($url);
    $exp = Crawler::extract_to_array($c->curline, 'href="', '"');
    foreach ($exp as $e) {
        $img = preg_replace('/^.*redirect\\.html\\?/', '', $e);
        echo "<a href='{$img}'>{$name}</a><br/>\n";
    }
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:10,代码来源:h.php

示例9: while

 while ($line = $c->readline()) {
     if (Crawler::is_there($line, '"ehga"')) {
         $pages[] = Crawler::extract($line, 'href="', '"');
     } else {
         if (Crawler::is_there($line, '</table>')) {
             break;
         }
     }
 }
 $c->close();
 foreach ($pages as $url) {
     echo "URL:{$url}<br/>\n";
     $c = new Crawler($url, array('use_curl' => true));
     $c->go_to('</span>');
     // ambil image source
     $raws = Crawler::extract_to_array($c->curline, 'src="', '"');
     echo '<pre>';
     print_r($raws);
     echo '</pre>';
     // gambar image biasanya berada di $raws[4] atau $raws[5]
     if (Crawler::is_there($raws[0], '/n/next.png')) {
         array_shift($raws);
     }
     // gambar image namanya lebih panjang
     $base1 = basename($raws[4]);
     $base2 = basename($raws[5]);
     if (strlen($base1) > strlen($base2)) {
         $img = $raws[4];
     } else {
         $img = $raws[5];
     }
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:31,代码来源:gehentai.php


注:本文中的Crawler::extract_to_array方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。