当前位置: 首页>>代码示例>>PHP>>正文


PHP extract_text函数代码示例

本文整理汇总了PHP中extract_text函数的典型用法代码示例。如果您正苦于以下问题:PHP extract_text函数的具体用法?PHP extract_text怎么用?PHP extract_text使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了extract_text函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: quick_wget

function quick_wget($trailing)
{
    $parts = explode(" ", $trailing);
    delete_empty_elements($parts);
    if (count($parts) < 2) {
        return False;
    }
    $url = $parts[0];
    array_shift($parts);
    $trailing = implode(" ", $parts);
    $parts = explode("<>", $trailing);
    delete_empty_elements($parts);
    if (count($parts) < 2) {
        return False;
    }
    $delim1 = trim($parts[0]);
    $delim2 = trim($parts[1]);
    $host = "";
    $uri = "";
    $port = "";
    if (get_host_and_uri($url, $host, $uri, $port) == False) {
        return False;
    }
    $response = wget_ssl($host, $uri, $port);
    $result = extract_text($response, $delim1, $delim2);
    if ($result === False) {
        return False;
    }
    $result = strip_tags($result);
    $result = html_decode($result);
    $result = html_decode($result);
    $result = trim($result);
    if ($result == "") {
        return False;
    }
    return $result;
}
开发者ID:cmn32480,项目名称:exec-irc-bot,代码行数:37,代码来源:wget_lib.php

示例2: mb_basename

		if (isset($filename_field))
			{
			$filename = $uploadfiles[$n];
			if ($use_local)
				{
				$filename = mb_basename($filename);
				}
			update_field($ref,$filename_field, $filename);
			}

		# get file metadata 
		if (getval("no_exif","")=="") {extract_exif_comment($ref,$extension);}
		
		# extract text from documents (e.g. PDF, DOC).
		global $extracted_text_field;
		if (isset($extracted_text_field) && !$no_exif) {extract_text($ref,$extension);}

		$done++;

		# Add to collection?
		if ($collection!="")
			{
			$refs[] = $ref;
			}

		# Log this
		daily_stat("Resource upload",$ref);
		resource_log($ref,'u',0);

		}
开发者ID:Jtgadbois,项目名称:Pedadida,代码行数:30,代码来源:team_batch_upload.php

示例3: save_resource_data

     //Save the data
     save_resource_data($ref,false);
     //Update creation times, extension and title
     sql_query("insert into resource_data (resource, resource_type_field, value) values ($ref,12,now()),($ref,148,now())");
     sql_query("update resource set file_extension = '" . $file_extension . "', field12 = now(), title ='".escape_check($filename)."'WHERE ref = '" . $ref . "'");
     $data['success']=true;
     $data['status']="success";
     $data['error']=false;
     $data['textStatus']= $file . " - Successfully Added";
     $data['ref']=$ref;
 }else{
     $data['error']=true;
     $data['textStatus']="could not move file to tmp";
 }
 if($resource_type==2){
     extract_text($ref,$file_extension);
 }
 if($resource_type != 2 && $resource_type != 3 && $resource_type != 4){
     //create preview files in that directory
     create_previews_using_im($ref,false,$file_extension);
     $nothumb = false;
 }else{
     $nothumb = true;
     sql_query("UPDATE resource SET is_transcoding = 1 WHERE ref = $ref");
     //Process previews in the backgrond and continue
     $attempts = 1;
     $command = "/usr/bin/php -q -f /var/www/plugins/mia_upload/pages/background_previews.php $resource_type $ref $file_extension $attempts";
     exec("$command > /dev/null &", $arrOutput);
 }
 savetoelastic($ref);
 echo(json_encode($data));
开发者ID:artsmia,项目名称:mia_resourcespace,代码行数:31,代码来源:upload_batch.php

示例4: minion_talk

function minion_talk($nick, $channel, $trailing)
{
    $relays_bucket = "activity.php/minion_talk/relays";
    $relays = get_array_bucket($relays_bucket);
    # flush all outdated relays
    $save_bucket = False;
    foreach ($relays as $freenode_nick => $freenode_channels) {
        foreach ($relays[$freenode_nick] as $freenode_channel => $data) {
            if (microtime(True) - $data["timestamp"] > 10 * 60) {
                unset($relays[$freenode_nick][$freenode_channel]);
                $save_bucket = True;
            }
        }
    }
    if ($nick != "") {
        $account = users_get_account($nick);
        $allowed = array("crutchy", "chromas", "mrcoolbp", "NCommander", "juggs", "TheMightyBuzzard");
        if (in_array($account, $allowed) == True) {
            if ($trailing == ".relays") {
                $n = 0;
                foreach ($relays as $freenode_nick => $freenode_channels) {
                    foreach ($relays[$freenode_nick] as $freenode_channel => $data) {
                        $rem = round(($data["timestamp"] + 10 * 60 - microtime(True)) / 60, 0);
                        pm($channel, chr(3) . "13  {$freenode_nick}: {$freenode_channel} => " . $data["channel"] . " (unset in {$rem} minutes)");
                        $n++;
                    }
                }
                if ($n == 0) {
                    pm($channel, chr(3) . "13  no channel relays currently active");
                }
                return;
            }
            $params = explode(">", $trailing);
            if (count($params) >= 2) {
                $freenode_channel = strtolower(trim($params[0]));
                if (substr($freenode_channel, 0, 1) == "#") {
                    array_shift($params);
                    $msg = trim(implode(">", $params));
                    if (strlen($msg) > 0) {
                        $commands = array("~minion raw sylnt :sylnt PRIVMSG {$freenode_channel} :<{$nick}> {$msg}");
                        internal_macro($commands);
                        $parts = explode(",", $msg);
                        $freenode_nick = strtolower(trim($parts[0]));
                        if (count($parts) > 1 and strpos($freenode_nick, " ") === False) {
                            $relays[$freenode_nick][$freenode_channel] = array("channel" => $channel, "timestamp" => microtime(True));
                            pm($channel, chr(3) . "13  ten minute relay set for \"{$freenode_nick}\" in \"{$freenode_channel}\" on freenode to \"{$channel}\" on this server");
                            $save_bucket = True;
                        }
                    }
                }
            }
        }
    }
    if ($channel == "#freenode") {
        $freenode_nick = extract_text($trailing, chr(3) . "03", chr(3) . " [", False);
        $freenode_channel = extract_text($trailing, chr(3) . " [" . chr(3) . "02", chr(3) . "] " . chr(3) . "05", False);
        if (isset($relays[strtolower($freenode_nick)][$freenode_channel]) == True) {
            $freenode_trailing = extract_text($trailing, chr(3) . "] " . chr(3) . "05", chr(3), True);
            pm($relays[strtolower($freenode_nick)][$freenode_channel]["channel"], chr(3) . "03" . $freenode_nick . chr(3) . " [" . chr(3) . "02" . $freenode_channel . chr(3) . "] " . chr(3) . "05" . $freenode_trailing);
        }
    }
    if ($save_bucket == True) {
        set_array_bucket($relays, $relays_bucket);
    }
}
开发者ID:cmn32480,项目名称:exec-irc-bot,代码行数:65,代码来源:activity.php

示例5: source_define

function source_define($host, $term, $params)
{
    global $debug;
    $sterm = $term;
    if ($params["space_delim"] != "") {
        $sterm = str_replace(" ", $params["space_delim"], $sterm);
    }
    $uri = str_replace($params["template"], urlencode($sterm), $params["uri"]);
    term_echo("*** DEFINE: trying {$host}{$uri} on port " . $params["port"]);
    $response = wget($host, $uri, $params["port"], ICEWEASEL_UA, "", 20);
    $html = strip_headers($response);
    $html = replace_ctrl_chars($html, " ");
    strip_all_tag($html, "head");
    strip_all_tag($html, "script");
    if ($debug == "ON") {
        privmsg("debug [{$host}]: uri = \"{$uri}\"");
        $L = strlen($html);
        privmsg("debug [{$host}]: html length = \"{$L}\"");
        unset($L);
        privmsg("debug [{$host}]: delim_start = \"" . $params["delim_start"] . "\"");
        privmsg("debug [{$host}]: delim_end = " . $params["delim_end"] . "\"");
    }
    $i = strpos($html, $params["delim_start"]);
    $def = "";
    if ($i !== False) {
        if ($debug == "ON") {
            privmsg("debug [{$host}]: delim_start pos = \"{$i}\"");
        }
        $html = substr($html, $i + strlen($params["delim_start"]));
        $i = strpos($html, $params["delim_end"]);
        if ($i !== False) {
            if ($debug == "ON") {
                privmsg("debug [{$host}]: delim_end pos = \"{$i}\"");
            }
            $def = trim(strip_tags(substr($html, 0, $i)));
            $def = str_replace(array("\n", "\r"), " ", $def);
            $def = str_replace("  ", " ", $def);
            if (strlen($def) > MAX_DEF_LENGTH) {
                $def = trim(substr($def, 0, MAX_DEF_LENGTH)) . "...";
            }
        }
    }
    if ($def == "") {
        $location = exec_get_header($response, "location");
        if ($location == "") {
            return False;
        } else {
            $new_term = extract_text($location, $params["get_param"], "&", True);
            if ($new_term != $term) {
                term_echo("redirecting to \"{$location}\"");
                if ($debug == "ON") {
                    privmsg("debug [{$host}]: redirecting to \"{$location}\"");
                }
                return source_define($host, $new_term, $params);
            } else {
                return False;
            }
        }
    } else {
        if ($params["ignore"] != "" and strpos($def, $params["ignore"]) !== False) {
            return False;
        }
        if (strpos($def, "There aren't any definitions") !== False) {
            return False;
        }
        privmsg("[" . $params["name"] . "] " . chr(3) . "03{$term}" . chr(3) . ": " . html_decode($def));
        return True;
    }
}
开发者ID:cmn32480,项目名称:exec-irc-bot,代码行数:69,代码来源:definitions.php

示例6: chr

$host = "soylentnews.org";
$list_uri = "/journal.pl?op=top";
$port = 80;
$msg = chr(3) . "08" . "********** " . chr(3) . "03" . chr(2) . "SOYLENTNEWS JOURNAL FEED" . chr(2) . chr(3) . "08" . " **********";
output($msg);
$last_id = 878;
if (file_exists(JOURNALS_ID_FILE) == True) {
    $last_id = file_get_contents(JOURNALS_ID_FILE);
}
$msg = "last journal = {$last_id}";
output($msg);
$response = wget($host, $list_uri, $port, ICEWEASEL_UA, "", 60);
$html = strip_headers($response);
$delim1 = "<!-- start template: ID 60, journaltop;journal;default -->";
$delim2 = "<!-- end template: ID 60, journaltop;journal;default -->";
$html = extract_text($html, $delim1, $delim2);
if ($html === False) {
    output("error: journal list not found");
    return;
}
$rows = explode("<tr>", $html);
array_shift($rows);
array_shift($rows);
$item_count = 20;
for ($i = 0; $i < max($item_count, count($rows)); $i++) {
    $cells = explode("<td valign=\"top\">", $rows[$i]);
    if (count($cells) != 4) {
        term_echo("*** SN JOURNAL FEED: invalid number of cells for row {$i}");
        continue;
    }
    # TODO: DEBUG HERE
开发者ID:cmn32480,项目名称:exec-irc-bot,代码行数:31,代码来源:journal_feed.php

示例7: mysql_real_escape_string

    $faction = mysql_real_escape_string($_POST['faction']);
    $flags = mysql_real_escape_string($_POST['flags']);
    $model = htmlspecialchars($_POST['model']);
    $sql = "UPDATE  `characters` SET  `_Name` =  '{$name}',\n`_Cash` =  '{$cash}',\n`_Model` =  '{$model}',\n`_Flags` =  '{$flags}',\n`_Faction` =  '{$faction}' WHERE  `characters`.`_Key` = {$key}";
    echo "<div class='well'>\n<h2>Admin Edit Mode</h2>\n{$sql}\n</div>\n";
} else {
    $newphysicaldesc = mysql_real_escape_string($_POST['physdesc']);
    $result = mysql_query("SELECT * FROM  `characters` WHERE `_Schema` = '" . $gamemodecode . "' AND `_Key` LIKE  '" . $key . "'");
    if (mysql_error() == "") {
    } else {
        echo '<div class="alert alert-error"> ERROR' . mysql_error() . '</div>';
    }
    while ($row = @mysql_fetch_array($result)) {
        $data = $row['_Data'];
        $name = $row['_Name'];
        $oldphysdesc = extract_text($data, '"PhysDesc":"', '","');
    }
    echo "<div class='well'><h2>Replacing</h2> <code>{$oldphysdesc}</code> <h2>with</h2> <code>{$newphysicaldesc}</code> <h2>on {$name}</h2></div>";
    $sql = "UPDATE characters set _Data= replace(_Data, \"{$oldphysdesc}\", \"{$newphysicaldesc}\") WHERE `characters`.`_Key` ={$key}";
}
mysql_query($sql);
if (mysql_error() == "") {
    echo '<div class="alert alert-success">Character Updated!</div><br>
<a href="index.php"class="btn btn-large btn-block btn-success">Return to dashboard</a>
';
} else {
    echo '<div class="alert alert-error">ERROR - ' . mysql_error() . '<br>If this keeps happening you should contact the Owner about this!</div><br>
<input type="button" class="btn btn-large btn-block btn-error" value="Go Back" onclick="goBack()">
';
}
?>
开发者ID:Magicobxershorts,项目名称:clockworkplugins,代码行数:31,代码来源:insert.php

示例8: index_url

function index_url($url, $level, $site_id, $md5sum, $domain, $indexdate, $sessid, $can_leave_domain, $reindex)
{
    global $min_delay;
    global $command_line;
    global $min_words_per_page;
    global $supdomain, $index_vpaths;
    global $user_agent, $tmp_urls, $delay_time, $domain_arr;
    global $db;
    $deletable = 0;
    $url_status = url_status($url);
    $thislevel = $level - 1;
    if (strstr($url_status['state'], "Relocation")) {
        $url = preg_replace("/ /", "", url_purify($url_status['path'], $url, $can_leave_domain));
        if ($url != '') {
            $result = $db->query("SELECT link FROM " . TABLE_PREFIX . "temp WHERE link=" . $db->quote($url) . " AND id=" . $db->quote($sessid));
            echo sql_errorstring(__FILE__, __LINE__);
            if ($result->fetch()) {
                $result->closeCursor();
                $db->exec("INSERT INTO " . TABLE_PREFIX . "temp (link, level, id) VALUES (" . $db->quote($url) . ", " . $db->quote($level) . ", " . $db->quote($sessid) . ")");
                echo sql_errorstring(__FILE__, __LINE__);
            }
        }
        $url_status['state'] == "redirected";
    }
    if (!$index_vpaths && $url_status['state'] == 'ok') {
        $url_parts = parse_url($url);
        $base = basename($url_parts['path']);
        if (strstr($base, '.') == false) {
            $url_status['state'] = "directory listing or default redirect";
        }
    }
    ini_set("user_agent", $user_agent);
    if ($url_status['state'] == 'ok') {
        $OKtoIndex = 1;
        $file_read_error = 0;
        if (time() - $delay_time < $min_delay) {
            sleep($min_delay - (time() - $delay_time));
        }
        $delay_time = time();
        if (!fst_lt_snd(phpversion(), "4.3.0")) {
            $file = file_get_contents($url);
            if ($file === FALSE) {
                $file_read_error = 1;
            }
        } else {
            $fl = @fopen($url, "r");
            if ($fl) {
                while ($buffer = @fgets($fl, 4096)) {
                    $file .= $buffer;
                }
            } else {
                $file_read_error = 1;
            }
            fclose($fl);
        }
        if ($file_read_error) {
            $contents = getFileContents($url);
            $file = $contents['file'];
        }
        $pageSize = number_format(strlen($file) / 1024, 2, ".", "");
        printPageSizeReport($pageSize);
        if ($url_status['content'] != 'text') {
            $file = extract_text($file, $url_status['content']);
        }
        printStandardReport('starting', $command_line);
        $newmd5sum = md5($file);
        if ($reindex == 0) {
            if ($md5sum == $newmd5sum) {
                printStandardReport('md5notChanged', $command_line);
                $OKtoIndex = 0;
            } else {
                if (isDuplicateMD5($newmd5sum)) {
                    $OKtoIndex = 0;
                    printStandardReport('duplicate', $command_line);
                }
            }
        }
        if (($md5sum != $newmd5sum || $reindex == 1) && $OKtoIndex == 1) {
            $urlparts = parse_url($url);
            $newdomain = $urlparts['host'];
            $type = 0;
            // remove link to css file
            //get all links from file
            $data = clean_file($file, $url, $url_status['content']);
            if ($data['noindex'] == 1) {
                $OKtoIndex = 0;
                $deletable = 1;
                printStandardReport('metaNoindex', $command_line);
            }
            $wordarray = unique_array(explode(" ", $data['content']));
            if ($data['nofollow'] != 1) {
                $links = get_links($file, $url, $can_leave_domain, $data['base']);
                $links = distinct_array($links);
                $all_links = count($links);
                $numoflinks = 0;
                //if there are any, add to the temp table, but only if there isnt such url already
                if (is_array($links)) {
                    reset($links);
                    while ($thislink = each($links)) {
                        if (!isset($tmp_urls[$thislink[1]]) || $tmp_urls[$thislink[1]] != 1) {
//.........这里部分代码省略.........
开发者ID:highestgoodlikewater,项目名称:sphider-pdo,代码行数:101,代码来源:spider.php

示例9: sn_submit


//.........这里部分代码省略.........
        $source_body = extract_meta_content($source_html, "og:description", "property");
        if ($source_body === False or $source_body == "") {
            privmsg("error: description meta content not found or empty");
            return False;
        }
    }
    $html = $source_html;
    $article = extract_raw_tag($html, "article");
    if ($article !== False) {
        $html = $article;
    }
    strip_all_tag($html, "head");
    strip_all_tag($html, "script");
    strip_all_tag($html, "style");
    #strip_all_tag($html,"a");
    strip_all_tag($html, "strong");
    $html = strip_tags($html, "<p>");
    $html = lowercase_tags($html);
    $html = explode("<p", $html);
    $source_body = array();
    for ($i = 0; $i < count($html); $i++) {
        $parts = explode(">", $html[$i]);
        if (count($parts) >= 2) {
            array_shift($parts);
            $html[$i] = implode(">", $parts);
        }
        $html[$i] = strip_tags($html[$i]);
        $html[$i] = clean_text($html[$i]);
        $host_parts = explode(".", $host);
        for ($j = 0; $j < count($host_parts); $j++) {
            if (strlen($host_parts[$j]) > 3) {
                if (strpos(strtolower($html[$i]), strtolower($host_parts[$j])) !== False) {
                    continue 2;
                }
            }
        }
        if (filter($html[$i], "0123456789") != "") {
            continue;
        }
        if (strlen($html[$i]) > 1) {
            if ($html[$i][strlen($html[$i]) - 1] != ".") {
                continue;
            }
            while (True) {
                $j = strlen($html[$i]) - 1;
                if ($j < 0) {
                    break;
                }
                $c = $html[$i][$j];
                if ($c == ".") {
                    break;
                }
                $html[$i] = substr($html[$i], 0, $j);
            }
        }
        if (strlen($html[$i]) > 100) {
            $source_body[] = $html[$i];
        }
    }
    $source_body = implode("\n\n", $source_body);
    $source_body = html_decode($source_body);
    $source_body = html_decode($source_body);
    $host = "dev.soylentnews.org";
    $port = 443;
    $uri = "/submit.pl";
    $response = wget($host, $uri, $port, ICEWEASEL_UA);
    $html = strip_headers($response);
    $reskey = extract_text($html, "<input type=\"hidden\" id=\"reskey\" name=\"reskey\" value=\"", "\">");
    if ($reskey === False) {
        privmsg("error: unable to extract reskey");
        return False;
    }
    sleep(25);
    $params = array();
    $params["reskey"] = $reskey;
    #$params["name"]=trim(substr($nick,0,50));
    $params["name"] = get_bot_nick();
    $params["email"] = "";
    $params["subj"] = trim(substr($source_title, 0, 100));
    $params["primaryskid"] = "1";
    $params["tid"] = "6";
    $params["sub_type"] = "plain";
    $params["story"] = $source_body . "\n\n" . $url . "\n\n-- submitted from IRC";
    $params["op"] = "SubmitStory";
    $response = wpost($host, $uri, $port, ICEWEASEL_UA, $params);
    $html = strip_headers($response);
    strip_all_tag($html, "head");
    strip_all_tag($html, "script");
    strip_all_tag($html, "style");
    strip_all_tag($html, "a");
    $html = strip_tags($html);
    $html = clean_text($html);
    if (strpos($html, "Perhaps you would like to enter an email address or a URL next time. Thanks for the submission.") !== False) {
        privmsg("submission successful - https://{$host}/submit.pl?op=list");
        return True;
    } else {
        privmsg("error: something went wrong with your submission");
        return False;
    }
}
开发者ID:cmn32480,项目名称:exec-irc-bot,代码行数:101,代码来源:sn_lib.php

示例10: extract_text

 $pid_delim2 = "\">Parent";
 $pid_test = extract_text($pid_html, $pid_delim1, $pid_delim2);
 $pid = "";
 $parent_url = "";
 if ($pid_test !== False) {
     $pid = $pid_test;
     $parent_url = "http://soylentnews.org/comments.pl?sid={$sid}&cid={$pid}";
 }
 $subject_delim1 = "<h4><a name=\"{$cid}\">";
 $subject_delim2 = "</a>";
 $subject = extract_text($parts[$j], $subject_delim1, $subject_delim2);
 $subject = trim(strip_tags($subject));
 $subject = str_replace("  ", " ", $subject);
 $subject = html_decode($subject);
 $subject = html_decode($subject);
 $comment_body = extract_text($parts[$j], "<div id=\"comment_body_{$cid}\">", "</div>");
 $comment_body = replace_ctrl_chars($comment_body, " ");
 $comment_body = str_replace("</p>", " ", $comment_body);
 $comment_body = str_replace("<p>", " ", $comment_body);
 $comment_body = str_replace("<br>", " ", $comment_body);
 $comment_body = trim(strip_tags($comment_body));
 $comment_body = str_replace("  ", " ", $comment_body);
 $comment_body = html_decode($comment_body);
 $comment_body = html_decode($comment_body);
 $record = array();
 $record["user"] = $user;
 $record["uid"] = $uid;
 $record["score"] = $score;
 $record["score_num"] = $score_num;
 $record["subject"] = $subject;
 $record["title"] = $title;
开发者ID:cmn32480,项目名称:exec-irc-bot,代码行数:31,代码来源:comment_feed.php

示例11: parse_data

function parse_data($keys, $data, $suffix = "=")
{
    $result = array();
    $n = count($keys) - 1;
    if ($n < 0) {
        return False;
    }
    for ($i = 0; $i < $n; $i++) {
        $delim1 = $keys[$i] . $suffix;
        $delim2 = $keys[$i + 1] . $suffix;
        $result[$keys[$i]] = extract_text($data, $delim1, $delim2);
        if ($result[$keys[$i]] === False) {
            return False;
        }
    }
    $delim = $keys[$n] . $suffix;
    $result[$keys[$n]] = extract_text($data, $delim, "", True);
    if ($result[$keys[$n]] === False) {
        return False;
    }
    return $result;
}
开发者ID:cmn32480,项目名称:exec-irc-bot,代码行数:22,代码来源:slash_test.php

示例12: extract_meta_content

function extract_meta_content($html, $name, $key = "name")
{
    # <meta name="description" content="Researchers have made a breakthrough in blah blah blah." id="metasummary" />
    $lhtml = strtolower($html);
    $lname = strtolower($name);
    $parts = explode("<meta ", $lhtml);
    array_shift($parts);
    if (count($parts) == 0) {
        return False;
    }
    $result = "";
    for ($i = 0; $i < count($parts); $i++) {
        $n = extract_text($parts[$i], "{$key}=\"", "\"");
        if ($n === False) {
            continue;
        }
        if ($n != $lname) {
            continue;
        }
        $result = extract_text($parts[$i], "content=\"", "\"");
        break;
    }
    if ($result == "") {
        return False;
    }
    $i = strpos($lhtml, $result);
    if ($i === False) {
        return False;
    }
    $result = substr($html, $i, strlen($result));
    return $result;
}
开发者ID:cmn32480,项目名称:exec-irc-bot,代码行数:32,代码来源:lib_http.php

示例13: fputs

 $cpt = 0;
 fputs($fdc, "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n\r\n\t\t\t<?xml-stylesheet type=\"text/css\" href=\"style.css\" media=\"all\"?>\n\r\n\t\t\t<note uri=\"uri_note_" . $titre . "\">\n");
 foreach ($parties as $partie) {
     fputs($fdc, "<partie>");
     fputs($fdc, "<titrepartie>" . $partie['titre'] . "</titrepartie>");
     // echo "<pre>";
     // print_r($partie);
     // echo "</pre>";
     if ($partie['notes']) {
         foreach ($partie['notes'] as $file) {
             $fd = fopen('xml/note/' . $file . '.note', 'r');
             while (!feof($fd)) {
                 $line1 = fgets($fd);
                 if ($cpt == 0) {
                     if (match_tag('titre', $line1)) {
                         fputs($fdc, "<titre>" . extract_text('titre', $line1) . " [Compilation]</titre>\n");
                     } else {
                         if (!match_tag('note', $line1) and !match_tag('meta_note', $line1) and !match_tag('statut', $line1) and !match_tag('date_creation', $line1) and !match_tag('date_modification', $line1) and !match_tag('auteur', $line1) and !match_tag('contributeurs', $line1) and !match_tag('contributeur', $line1) and !match_tag('relecteur', $line1) and !match_tag('nom', $line1) and !match_tag('prenom', $line1) and !isComment($line1)) {
                             fputs($fdc, $line1);
                         }
                     }
                 } else {
                     if (!match_tag('note', $line1) and !match_tag('meta_note', $line1) and !match_tag('statut', $line1) and !match_tag('date_creation', $line1) and !match_tag('date_modification', $line1) and !match_tag('auteur', $line1) and !match_tag('contributeurs', $line1) and !match_tag('contributeur', $line1) and !match_tag('relecteur', $line1) and !match_tag('nom', $line1) and !match_tag('prenom', $line1) and !match_tag('titre', $line1) and !isComment($line1)) {
                         fputs($fdc, $line1);
                     }
                 }
             }
             $cpt++;
         }
     }
     fputs($fdc, '</partie>');
开发者ID:astorije,项目名称:projet-nf29-a10,代码行数:31,代码来源:pdf.php

示例14: ProcessFolder


//.........这里部分代码省略.........
                                        # Save the value
                                        print_r($path_parts);
                                        $value = $path_parts[$level-1];
                                        update_field ($r, $field, $value);
                                        echo " - Extracted metadata from path: $value" . PHP_EOL;
                                        }
                                    }
                                }
                            }
                        }

                    # update access level
                    sql_query("UPDATE resource SET access = '$accessval',archive='$staticsync_defaultstate' WHERE ref = '$r'");

                    # Add any alternative files
                    $altpath = $fullpath . $staticsync_alternatives_suffix;
                    if ($staticsync_ingest && file_exists($altpath))
                        {
                        $adh = opendir($altpath);
                        while (($altfile = readdir($adh)) !== false)
                            {
                            $filetype = filetype($altpath . "/" . $altfile);
                            if (($filetype == "file") && (substr($file,0,1) != ".") && (strtolower($file) != "thumbs.db"))
                                {
                                # Create alternative file                               
                                # Find extension
                                $ext = explode(".", $altfile);
                                $ext = $ext[count($ext)-1];
                                
                                $description = str_replace("?", strtoupper($ext), $lang["originalfileoftype"]);
                                $file_size   = filesize_unlimited($altpath . "/" . $altfile);
                                
                                $aref = add_alternative_file($r, $altfile, $description, $altfile, $ext, $file_size);
                                $path = get_resource_path($r, true, '', true, $ext, -1, 1, false, '', $aref);
                                rename($altpath . "/" . $altfile,$path); # Move alternative file
                                }
                            }   
                        }

                    # Add to collection
                    if ($staticsync_autotheme)
                        {
                        $test = ''; 
                        $test = sql_query("SELECT * FROM collection_resource WHERE collection='$collection' AND resource='$r'");
                        if (count($test) == 0)
                            {
                            sql_query("INSERT INTO collection_resource (collection, resource, date_added) 
                                            VALUES ('$collection', '$r', NOW())");
                            }
                        }
                    }
                else
                    {
                    # Import failed - file still being uploaded?
                    echo " *** Skipping file - it was not possible to move the file (still being imported/uploaded?)" . PHP_EOL;
                    }
                }
            else
                {
                # check modified date and update previews if necessary
                $filemod = filemtime($fullpath);
                if (array_key_exists($shortpath,$modtimes) && ($filemod > strtotime($modtimes[$shortpath])))
                    {
                    # File has been modified since we last created previews. Create again.
                    $rd = sql_query("SELECT ref, has_image, file_modified, file_extension FROM resource 
                                        WHERE file_path='" . escape_check($shortpath) . "'");
                    if (count($rd) > 0)
                        {
                        $rd   = $rd[0];
                        $rref = $rd["ref"];

                        echo "Resource $rref has changed, regenerating previews: $fullpath" . PHP_EOL;
                        extract_exif_comment($rref,$rd["file_extension"]);

                        # extract text from documents (e.g. PDF, DOC).
                        global $extracted_text_field;
                        if (isset($extracted_text_field)) {
                            if (isset($unoconv_path) && in_array($extension,$unoconv_extensions)){
                                // omit, since the unoconv process will do it during preview creation below
                                }
                            else {
                            extract_text($rref,$extension);
                            }
                        }

                        # Store original filename in field, if set
                        global $filename_field;
                        if (isset($filename_field))
                            {
                            update_field($rref,$filename_field,$file);  
                            }

                        create_previews($rref, false, $rd["file_extension"], false, false, -1, false, $staticsync_ingest);
                        sql_query("UPDATE resource SET file_modified=NOW() WHERE ref='$rref'");
                        }
                    }
                }
            }   
        }   
    }
开发者ID:artsmia,项目名称:mia_resourcespace,代码行数:101,代码来源:staticsync.php

示例15: index_url

function index_url($url, $level, $site_id, $md5sum, $domain, $indexdate, $sessid, $can_leave_domain, $reindex)
{
    global $entities, $min_delay;
    global $command_line;
    global $min_words_per_page;
    global $supdomain;
    global $mysql_table_prefix, $user_agent, $tmp_urls, $delay_time, $domain_arr;
    $needsReindex = 1;
    $deletable = 0;
    $url_status = url_status($url);
    $thislevel = $level - 1;
    if (strstr($url_status['state'], "Relocation")) {
        $url = preg_replace("/ /", "", url_purify($url_status['path'], $url, $can_leave_domain));
        if ($url != '') {
            $result = mysql_query("select link from " . $mysql_table_prefix . "temp where link='{$url}' && id = '{$sessid}'");
            echo mysql_error();
            $rows = mysql_numrows($result);
            if ($rows == 0) {
                mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$url}', '{$level}', '{$sessid}')");
                echo mysql_error();
            }
        }
        $url_status['state'] == "redirected";
    }
    /*
    		if ($indexdate <> '' && $url_status['date'] <> '') {
    			if ($indexdate > $url_status['date']) {
    				$url_status['state'] = "Date checked. Page contents not changed";
    				$needsReindex = 0;
    			}
    		}*/
    ini_set("user_agent", $user_agent);
    if ($url_status['state'] == 'ok') {
        $OKtoIndex = 1;
        $file_read_error = 0;
        if (time() - $delay_time < $min_delay) {
            sleep($min_delay - (time() - $delay_time));
        }
        $delay_time = time();
        if (!fst_lt_snd(phpversion(), "4.3.0")) {
            $file = file_get_contents($url);
            if ($file === FALSE) {
                $file_read_error = 1;
            }
        } else {
            $fl = @fopen($url, "r");
            if ($fl) {
                while ($buffer = @fgets($fl, 4096)) {
                    $file .= $buffer;
                }
            } else {
                $file_read_error = 1;
            }
            fclose($fl);
        }
        if ($file_read_error) {
            $contents = getFileContents($url);
            $file = $contents['file'];
        }
        $pageSize = number_format(strlen($file) / 1024, 2, ".", "");
        printPageSizeReport($pageSize);
        if ($url_status['content'] != 'text') {
            $file = extract_text($file, $url_status['content']);
        }
        printStandardReport('starting', $command_line);
        $newmd5sum = md5($file);
        if ($md5sum == $newmd5sum) {
            printStandardReport('md5notChanged', $command_line);
            $OKtoIndex = 0;
        } else {
            if (isDuplicateMD5($newmd5sum)) {
                $OKtoIndex = 0;
                printStandardReport('duplicate', $command_line);
            }
        }
        if (($md5sum != $newmd5sum || $reindex == 1) && $OKtoIndex == 1) {
            $urlparts = parse_url($url);
            $newdomain = $urlparts['host'];
            $type = 0;
            /*		if ($newdomain <> $domain)
            					$domainChanged = 1;
            
            				if ($domaincb==1) {
            					$start = strlen($newdomain) - strlen($supdomain);
            					if (substr($newdomain, $start) == $supdomain) {
            						$domainChanged = 0;
            					}
            				}*/
            // remove link to css file
            //get all links from file
            $data = clean_file($file, $url, $url_status['content']);
            if ($data['noindex'] == 1) {
                $OKtoIndex = 0;
                $deletable = 1;
                printStandardReport('metaNoindex', $command_line);
            }
            $wordarray = unique_array(explode(" ", $data['content']));
            if ($data['nofollow'] != 1) {
                $links = get_links($file, $url, $can_leave_domain, $data['base']);
                $links = distinct_array($links);
//.........这里部分代码省略.........
开发者ID:hoelzro,项目名称:Bifrost,代码行数:101,代码来源:spider.php


注:本文中的extract_text函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。