本文整理汇总了PHP中Crawler::extract_to_array方法的典型用法代码示例。如果您正苦于以下问题:PHP Crawler::extract_to_array方法的具体用法?PHP Crawler::extract_to_array怎么用?PHP Crawler::extract_to_array使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Crawler
的用法示例。
在下文中一共展示了Crawler::extract_to_array方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: crawl_1_chapter
function crawl_1_chapter($url, $chapter)
{
global $sitename;
global $prefix;
// http://ani-haven.net/hr-alpha/Psyren/145/
// @todo
$chapter = Crawler::pad($chapter, 3);
$c = new Crawler($url);
$c->go_to('id="myselectbox3"');
$c->readline();
$pages = Crawler::extract_to_array($c->curline, 'value="', '"');
$c->close();
// append $url ke $pages
foreach ($pages as $i => $page) {
$pages[$i] = $url . $page;
}
Crawler::multiProcess(4, $pages, 'crawl_1_page', array($chapter));
}
示例2: crawl_album
function crawl_album($url, $alias = false)
{
$c = new Crawler($url);
$c->go_to('<noscript>');
$c->go_to('<noscript>');
$c->readline();
$target = '';
//$c->curline;
while ($line = $c->readline()) {
if (Crawler::is_there($line, '</noscript>')) {
break;
} else {
$target .= trim($line);
}
}
$hasil = Crawler::extract_to_array($target, 'src="', '"');
$c->close();
/* kalo mo ngambil desc sebagai nama file
preg_match_all('/<img src="([^"]+)"><\\/a><p><a [^>]+>([^<]+)<\\/a>/', $target, $match);
//file_put_contents('picasaweb.out', print_r($match, true));exit;
foreach ($match[1] as $i => $uri) {
$info = pathinfo(basename($uri));
$ext = $info['extension'];
$name = $match[2][$i];
$img = str_replace('/s128/', '/', $uri);
echo "<a href='$img'>$name.$ext</a><br />\n";
}
exit;
*/
if ($alias) {
foreach ($hasil as $img) {
$img = str_replace('/s128/', '/d/', $img);
echo "<a href='{$img}'>{$alias}</a><br/>\n";
flush();
}
} else {
foreach ($hasil as $img) {
$img = str_replace('/s128/', '/d/', $img);
$basename = urldecode(basename($img));
echo "<a href='{$img}'>{$basename}</a><br/>\n";
flush();
}
}
}
示例3: go
public function go()
{
$start_url = $this->url;
if (preg_match('/gallery1\\.hentaifromhell\\.net/', $start_url)) {
$base = 'http://gallery1.hentaifromhell.net';
} else {
$base = 'http://gallery.hentaifromhell.net';
}
$selesai = false;
while (!$selesai) {
echo "{$start_url}<br/>\n";
$craw = new Crawler($start_url);
$craw->go2linewhere('showimg.php?c=');
while ($line = $craw->readline()) {
if (strpos($line, 'showimg.php?c=') !== false) {
$raw = Crawler::extract_to_array($line, '<a href="', '"');
foreach ($raw as $r) {
$href = str_replace('showimg.php?c=', '', $r);
$text = basename(dirname($href));
// basename($href);
echo '<a href="' . $href . '">' . $text . '</a>' . "<br />\n";
}
// $href = Crawler::extract($line, '<a href="', '"');
} else {
if (strpos($line, 'Next»') !== false) {
if (strpos($line, '<a href') !== false) {
$start_url = $base . Crawler::extract($line, '<a href="', '"');
} else {
$selesai = true;
}
break;
}
}
}
$craw->close();
}
}
示例4: array
case 'phase2':
require 'disneycomics.phase1';
//print_r(array_keys($a));exit;
$hasil = array();
foreach ($a as $name => $comics) {
foreach ($comics as $title => $url) {
$url = preg_replace('/show.*\\.php.*loc=/', '', $url);
/*
$url = str_replace('show.php?s=date&loc=', '', $url);
$url = str_replace('show.php?loc=', '', $url);
$url = str_replace('show2.php?loc=', '', $url);
*/
echo $url . "\n";
flush();
$text = file_get_contents($base . $url . '/');
$raws = Crawler::extract_to_array($text, '<A HREF="', '"');
$n = count($raws);
for ($i = 5; $i < $n; $i++) {
$raws[$i] = html_entity_decode($raws[$i]);
$hasil[$name][$title][$raws[$i]] = $base . $url . '/' . $raws[$i];
//echo $base . $url . '/' . $raws[$i] . "\n";flush();
}
//$hasil[$name][$title] = $raws;
}
}
ob_start();
echo "<?php\n\$a = ";
var_export($hasil);
echo ';';
file_put_contents('disneycomics.phase2', ob_get_clean());
break;
示例5: egscans_chapters
} else {
if (strpos($base, 'egscans.com')) {
egscans_chapters($chapters, $infixs);
} else {
if (strpos($base, 'omfggscans.com')) {
omfggscans_chapters($chapters, $infixs);
} else {
// $chapters = array_reverse($chapters);
// $infixs = array_reverse($infixs);
foreach ($chapters as $key => $val) {
$url = $base . '/' . $val;
echo "{$url}<br/>";
$chapter = $val;
$c = new Crawler($url);
$c->go_to('name="page"');
$pages = Crawler::extract_to_array($c->curline, 'value="', '"');
$c->close();
//print_r($pages);flush();
foreach ($pages as $page) {
//echo "$url/$page<br/>";flush();
do {
try {
$c = new Crawler($url . '/' . $page);
echo '1';
flush();
$c->go_to('class="picture"');
echo '2';
flush();
$img = $c->getbetween('<img src="', '"');
echo '3';
flush();
示例6: Crawler
<tr>
<th>Chapter Name</th>
<th>Infix</th>
</tr>
<?php
if ($stage1) {
echo '<tr><td colspan="2">Progress.. ';
$c = new Crawler($base);
$chapters = array();
$descriptions = array();
$infix = array();
// @TODO
$c->go_to('class="selector"');
$c->go_to('class="selector"');
$chapters = Crawler::extract_to_array($c->curline, "href='", "'");
$raws = Crawler::extract_to_array($c->curline, "class='option'>", '</div');
//array_shift($raws);
//array_pop($raws);
$descriptions = $raws;
foreach ($descriptions as $desc) {
preg_match('/(\\d+)$/', $desc, $matches);
$infix[] = $matches[1];
}
/*
while ($line = $c->readline()) {
if (Crawler::is_there($line, 'class="chico"')) {
$chp = Crawler::extract($line, 'href="', '"');
$chapters[] = $chp;
$descriptions[] = strip_tags(Crawler::extract($line, ': ', '</td>'));
$ifx = Crawler::cutfromlast1($chp, '/');
$ifx = str_replace('chapter-', '', $ifx);
示例7: explore
public static function explore($url, $start_sign = '<a href="', $end_sign = '"', $n_skip = 5)
{
echo "Currently {$url}\n";
flush();
$s = file_get_contents($url);
$r = array();
$l = Crawler::extract_to_array($s, $start_sign, $end_sign);
$n = count($l);
for ($i = $n_skip; $i < $n; $i++) {
if (strrpos($l[$i], '/') == strlen($l[$i]) - 1) {
$r[$l[$i]] = Crawler::explore($url . $l[$i], $start_sign, $end_sign, $n_skip);
} else {
$r[$l[$i]] = $l[$i];
}
}
return $r;
}
示例8: old_hfh_realm
function old_hfh_realm($url)
{
$name = basename($url);
$c = new Crawler($url);
$exp = Crawler::extract_to_array($c->curline, 'href="', '"');
foreach ($exp as $e) {
$img = preg_replace('/^.*redirect\\.html\\?/', '', $e);
echo "<a href='{$img}'>{$name}</a><br/>\n";
}
}
示例9: while
while ($line = $c->readline()) {
if (Crawler::is_there($line, '"ehga"')) {
$pages[] = Crawler::extract($line, 'href="', '"');
} else {
if (Crawler::is_there($line, '</table>')) {
break;
}
}
}
$c->close();
foreach ($pages as $url) {
echo "URL:{$url}<br/>\n";
$c = new Crawler($url, array('use_curl' => true));
$c->go_to('</span>');
// ambil image source
$raws = Crawler::extract_to_array($c->curline, 'src="', '"');
echo '<pre>';
print_r($raws);
echo '</pre>';
// gambar image biasanya berada di $raws[4] atau $raws[5]
if (Crawler::is_there($raws[0], '/n/next.png')) {
array_shift($raws);
}
// gambar image namanya lebih panjang
$base1 = basename($raws[4]);
$base2 = basename($raws[5]);
if (strlen($base1) > strlen($base2)) {
$img = $raws[4];
} else {
$img = $raws[5];
}