当前位置: 首页>>代码示例>>C++>>正文


C++ QUICKPOLL函数代码示例

本文整理汇总了C++中QUICKPOLL函数的典型用法代码示例。如果您正苦于以下问题:C++ QUICKPOLL函数的具体用法?C++ QUICKPOLL怎么用?C++ QUICKPOLL使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了QUICKPOLL函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: countWords

// a quickie
// this url gives a m_preCount that is too low. why?
// http://go.tfol.com/163/speed.asp
long countWords ( char *p , long plen , long niceness ) {
	char *pend  = p + plen;
	long  count = 1;
 loop:

	// sequence of punct
	for  ( ; p < pend && ! is_alnum_utf8 (p) ; p += getUtf8CharSize(p) ) {
		// breathe
		QUICKPOLL ( niceness );
		// in case being set from xml tags, count as words now
		if ( *p=='<') count++; 
	}
	count++;

	// sequence of alnum
	for  ( ; p < pend && is_alnum_utf8 (p) ; p += getUtf8CharSize(p) )
		// breathe
		QUICKPOLL ( niceness );

	count++;

	if ( p < pend ) goto loop;
	// some extra for good meaure
	return count+10;
}
开发者ID:BillWangCS,项目名称:open-source-search-engine,代码行数:28,代码来源:Words.cpp

示例2: makeKey

// . ***** META LIST DELETE LOOP *****
// . scan for meta lists to remove from syncdb
// . check every D KEY
// . must NOT have any "need to send request" keys (a bit set)
// . must NOT have any "need to recv request" keys (b bit set)
// . must NOT have our "need to add" key (c bit set)
void Syncdb::loop3 ( ) {
	// . loop over the meta lists we need to delete
	// . these are "d" keys
	// . use a "tid" of 0
	key128_t sk = makeKey ( 0,0,0,1,0,0,0,0 );
	key128_t ek = makeKey ( 0,0,0,1,0,0xffffffff,0xffffffffffffffffLL,1 );
	// get the first node in sequence, if any
	long nn = m_qt.getNextNode ( 0 , (char *)&sk );
	// do the loop
	for ( ; nn >= 0 ; nn = m_qt.getNextNode ( nn ) ) {
		// breathe
		QUICKPOLL ( MAX_NICENESS );
		// get key
		key128_t k = *(key128_t *)m_qt.getKey ( nn );
		// stop when we hit the end
		if ( k > ek ) break;
		// get zid
		uint64_t zid = getZid ( &k );
		// get sid
		uint32_t sid = getSid ( &k );
		// have we sent/recvd all checkoff requests required? have
		// we added the meta list? if so, we can nuke it from syncdb
		if ( ! canDeleteMetaList ( sid, zid ) ) {
			// no use banging away at this sid any more since we
			// are missing another action for this one
			sid++;
			// find the key of the FIRST meta list we need to add
			// for this new senderId, "sid"
			key128_t nk = makeKey ( 0,0,0,1,0,sid,0,0 );
			// undo the m_qt.getNextNode(nn) we call in for loop
			nn = m_qt.getPrevNode ( 0 , (char *)&nk );
			// sanity check
			if ( nn < 0 ) { char *xx=NULL;*xx=0; }
			// get next key from this new sid
			continue;
		}
		// . make the negative key for syncdb
		// . it just uses a negative "c" key, with a tid of 0
		key128_t dk = makeKey ( 0,0,1,0,0,sid,zid,0);
		// . add it to syncdb to signifiy a delete
		// . this returns false and sets g_errno on error
		if(!m_rdb.addRecord((collnum_t)0,(char *)&dk,NULL,0,
				    MAX_NICENESS)) return;
		// delete it from quick tree now that we added the negative
		// key successfully to syncdb
		long dn = m_qt.getNode ( 0, (char *)&k );
		// must be there!
		if ( ! dn ) { char *xx=NULL;*xx=0; }
		// nuke it
		m_qt.deleteNode ( dn , true );
	}
	// . success
	// . do not recall until big loop completes a round
	m_calledLoop3 = true;
}
开发者ID:FlavioFalcao,项目名称:open-source-search-engine,代码行数:61,代码来源:Syncdb.cpp

示例3: logTrace

// . returns false and sets g_errno on error
// . we are responsible for freeing reply/replySize
void Msg0::gotReply ( char *reply , int32_t replySize , int32_t replyMaxSize ) {
	logTrace( g_conf.m_logTraceMsg0, "BEGIN" );

	// timing debug
	if ( g_conf.m_logTimingNet && m_rdbId==RDB_POSDB && m_startTime > 0 )
		log(LOG_TIMING,"net: msg0: Got termlist, termId=%" PRIu64". "
		    "Took %" PRId64" ms, replySize=%" PRId32" (niceness=%" PRId32").",
		    g_posdb.getTermId ( m_startKey ) ,
		    gettimeofdayInMilliseconds()-m_startTime,
		    replySize,m_niceness);
	// TODO: insert some seals for security, may have to alloc
	//       separate space for the list then
	// set the list w/ the remaining data
	QUICKPOLL(m_niceness);

	m_list->set ( reply                , 
		      replySize            , 
		      reply                , // alloc buf begins here, too
		      replyMaxSize         ,
		      m_startKey           , 
		      m_endKey             , 
		      m_fixedDataSize      ,
		      true                 , // ownData?
		      m_useHalfKeys        ,
		      m_ks                 );

	// return now if we don't add to cache
	//if ( ! m_addToCache ) return;
	//
	// add posdb list to termlist cache
	//
	//if ( m_rdbId != RDB_POSDB ) return;
	// add to LOCAL termlist cache
	//addToTermListCache(m_coll,m_startKey,m_endKey,m_list);
	// ignore any error adding to cache
	//g_errno = 0;

	// . NO! no more network caching, we got gigabit... save space
	//   for our disk, no replication, man, mem is expensive

	// . throw the just the list into the net cache
	// . addToNetCache() will copy it for it's own
	// . our current copy should be freed by the user's callback somewhere
	// . grab our corresponding rdb's local cache
	// . we'll use it to store this list since there's no collision chance
	//RdbCache *cache = m_rdb->getCache ();
	// . add the list to this cache
	// . returns false and sets g_errno on error
	// . will not be added if cannot copy the data
	//cache->addList ( m_startKey , m_list ) ;
	// reset g_errno -- we don't care if cache coulnd't add it
	//g_errno = 0;
	logTrace( g_conf.m_logTraceMsg0, "END" );
}
开发者ID:lemire,项目名称:open-source-search-engine,代码行数:56,代码来源:Msg0.cpp

示例4: while

// after you read/write from/to disk, copy into the page cache
void DiskPageCache::addPages ( long vfd,
			       char *buf,
			       long numBytes,
			       long long offset ,
			       long niceness ){
	// check for override function
	//if ( m_isOverriden ) {
	//	m_addPages2 ( this,
	//		      vfd,
	//		      buf,
	//		      numBytes,
	//		      offset );
	//	return;
	//}
	// if vfd is -1, then we were not able to add a map for this file
	if ( vfd < 0 ) return;
	// no NULL ptrs
	if ( ! buf ) return;
	// return if no pages allowed in page cache
	if ( m_maxMemOff == 0 ) return;
	// or disabled
	if ( ! m_enabled ) return;
	// disabled at the master controls?
	if ( m_switch && ! *m_switch ) return;
	// sometimes the file got unlinked on us
	if ( ! m_memOff[vfd] ) return;
	// what is the page range?
	long long sp = offset / m_pageSize ;
	// point to it
	char *bufPtr = buf;
	char *bufEnd = buf + numBytes;
	// . do not add first page unless right on the boundary
	// . how much did we exceed the boundary by?
	oldshort skip = offset - sp * m_pageSize ;
	long  size = m_pageSize - skip;
	// now add the remaining pages
	while ( bufPtr < bufEnd ) {
		// breathe
		QUICKPOLL(niceness);
		// ensure "size" is not too big
		if ( bufPtr + size > bufEnd ) size = bufEnd - bufPtr;
		// add the page to memory
		addPage ( vfd , sp , bufPtr , size , skip );
		// advance
		bufPtr += size;
		sp++;
		size    = m_pageSize;
		skip    = 0;
	}
}
开发者ID:FlavioFalcao,项目名称:open-source-search-engine,代码行数:51,代码来源:DiskPageCache.cpp

示例5: while

// . returns true if document is adult, false otherwise
bool AdultBit::getBit ( char *s , int32_t niceness) {

	// rudimentary adult detection algorithm
	int32_t  i   = 0;
	int32_t  dirties = 0;
	int32_t  j;
	int32_t  slen;
 loop:

	// skip until we hit an alpha
	while ( s[i] && ! is_alpha_a(s[i]) ) i++;
	// return if done
	if ( ! s[i] ) return false;
	// . point to char after this alpha
	// . return if none
	j = i + 1;
	// find end of the alpha char sequence
	while ( s[j] && is_alpha_a(s[j]) ) j++;
	// skip over 1 or 2 letter words
	slen = j - i; 
	if ( slen <= 2 ) { i = j; goto loop; }
	// it's adult content if it has just 1 obscene word
	if ( isObscene ( (char *) s+i , slen ) ) return true;

	// W = non-dirty word
	// D = dirty word
	// . = sequence of punctuation/num and/or 1 to 2 letter words
	// dirty sequences: 
	// . D . D . D .     (dirties=6)
	// . D . W . D . D . (dirties=5)
	// . basically, if 3 out of 4 words in a subsequence are
	//   "dirty" then the whole document is "adult" content
	if ( isDirty ( (char *) s+i , slen ) ) {
		dirties += 2;
		if ( dirties >= 5 ) return true;
		i = j;
		goto loop;
	}

	dirties--;
	if ( dirties < 0 ) dirties = 0;

	QUICKPOLL((niceness));
	i = j;
	goto loop;
}
开发者ID:BlaBlaNet,项目名称:open-source-search-engine,代码行数:47,代码来源:AdultBit.cpp

示例6: getStatState

// . return false with g_errno set on error, true otherwise
// . looking at the number of points per second
// . average query latency for last 20 queries
// . average disk bytes read for last 20 accesses
// . val is the State::m_value measurement, a float
// . also each point may represent a number of bytes transferred in which
//   case we use that number rather than "1", which is the default
bool Statsdb::addPointsFromList ( Label *label ) {

	StatState *ss  = getStatState ( label->m_graphHash );
	// return false with g_errno set
	if ( ! ss ) return false;

	m_list.resetListPtr();
	// scan the list for our junk
	for ( ; ! m_list.isExhausted() ; m_list.skipCurrentRecord() ) {
		// breathe
		QUICKPOLL(m_niceness);
		// get that
		StatKey *sk = (StatKey *)m_list.getCurrentRec();
		// and data
		StatData *sd = (StatData *)m_list.getCurrentData();
		// must be a "query" stat
		if ( sk->m_labelHash != label->m_labelHash ) continue;
		// add that
		addPoint ( sk , sd , ss , label );
	}
	return true;
}
开发者ID:automatedtendencies,项目名称:open-source-search-engine,代码行数:29,代码来源:Statsdb.cpp

示例7: QUICKPOLL

//
// . add EventPoints to m_sb3/m_ht3
// . these basically represent binary events or parm state changes
// . i.e. "a merge operation"
// . i.e. "changing a parm value"
//
bool Statsdb::addEventPointsFromList ( ) {

	m_list.resetListPtr();
	// scan the list for our junk
	for ( ; ! m_list.isExhausted() ; m_list.skipCurrentRecord() ) {
		// breathe
		QUICKPOLL(m_niceness);
		// get that
		StatKey *sk = (StatKey *)m_list.getCurrentRec();
		// and data
		StatData *sd = (StatData *)m_list.getCurrentData();
		// must be an "event" stat... i.e. a status change
		if ( ! sd->isEvent() ) continue;
		// make sure to stack lines so they do not touch
		// each other...
		if ( ! addEventPoint ( sk->m_time1       ,
				       sk->m_labelHash   , // parmHash
				       sd->getOldVal  () ,
				       sd->getNewVal  () ,
				       10                )) // thickness
			return false;
	}
	return true;
}
开发者ID:automatedtendencies,项目名称:open-source-search-engine,代码行数:30,代码来源:Statsdb.cpp

示例8: ask


//.........这里部分代码省略.........
	// . groupMask must turn on higher bits first (count downwards kinda)
	// . titledb and spiderdb use special masks to get groupId

	// if diffbot.cpp is reading spiderdb from each shard we have to
	// get groupid from hostid here lest we core in getGroupId() below.
	// it does that for dumping spiderdb to the client browser. they
	// can download the whole enchilada.
	if ( hostId >= 0 && m_rdbId == RDB_SPIDERDB )
		m_shardNum = 0;
	// did they force it? core until i figure out what this is
	else if ( forceParitySplit >= 0 ) 
		//m_groupId =  g_hostdb.getGroupId ( forceParitySplit );
		m_shardNum = forceParitySplit;
	else
		//m_groupId = getGroupId ( m_rdbId , startKey , ! noSplit );
		m_shardNum = getShardNum ( m_rdbId , startKey );

	// if we are looking up a termlist in posdb that is split by termid and
	// not the usual docid then we have to set this posdb key bit that tells
	// us that ...
	if ( noSplit && m_rdbId == RDB_POSDB )
		m_shardNum = g_hostdb.getShardNumByTermId ( startKey );

	// how is this used?
	if ( forceLocalIndexdb ) m_shardNum = getMyShardNum();


//	if( g_conf.m_logTraceMsg0 ) log("%s:%s:%d: shardNum [%" PRId32"]", __FILE__,__func__, __LINE__, m_shardNum);


	// . store these parameters
	// . get a handle to the rdb in case we can satisfy locally
	// . returns NULL and sets g_errno on error
	QUICKPOLL((m_niceness));
	Rdb *rdb = getRdbFromId ( m_rdbId );
	if ( ! rdb ) return true;
	// we need the fixedDataSize
	m_fixedDataSize = rdb->getFixedDataSize();
	m_useHalfKeys   = rdb->useHalfKeys();
	// . debug msg
	// . Msg2 does this when checking for a cached compound list.
	//   compound lists do not actually exist, they are merges of smaller
	//   UOR'd lists.
	if ( maxCacheAge != 0 && ! addToCache && (numFiles > 0 || includeTree)) {
		log( LOG_LOGIC, "net: msg0: Weird. check but don't add... rdbid=%" PRId32".", ( int32_t ) m_rdbId );
	}

	// set this here since we may not call msg5 if list not local
	//m_list->setFixedDataSize ( m_fixedDataSize );

	// . now that we do load balancing we don't want to do a disk lookup
	//   even if local if we are merging or dumping
	// . UNLESS g_conf.m_preferLocalReads is true
	if ( preferLocalReads == -1 ) 
		preferLocalReads = g_conf.m_preferLocalReads;

	// . always prefer local for full split clusterdb
	// . and keep the tfndb/titledb lookups in the same stripe
	// . so basically we can't do biased caches if fully split
	//if ( g_conf.m_fullSplit ) preferLocalReads = true;
	preferLocalReads = true;

	// it it stored locally?
	bool isLocal = ( m_hostId == -1 && //g_hostdb.m_groupId == m_groupId );
			 m_shardNum == getMyShardNum() );
	// only do local lookups if this is true
开发者ID:lemire,项目名称:open-source-search-engine,代码行数:67,代码来源:Msg0.cpp

示例9: gotListWrapper

// . slot should be auto-nuked upon transmission or error
// . TODO: ensure if this sendReply() fails does it really nuke the slot?
void gotListWrapper ( void *state , RdbList *listb , Msg5 *msg5xx ) {
	// get the state
	State00 *st0 = (State00 *)state;
	// extract the udp slot and list and msg5
	UdpSlot   *slot =  st0->m_slot;
	RdbList   *list = &st0->m_list;
	Msg5      *msg5 = &st0->m_msg5;
	UdpServer *us   =  st0->m_us;
	// sanity check -- ensure they match
	//if ( niceness != st0->m_niceness )
	//	log("Msg0: niceness mismatch");
	// debug msg
	//if ( niceness != 0 ) 
	//	log("HEY! niceness is not 0");
	// timing debug
	if ( g_conf.m_logTimingNet || g_conf.m_logDebugNet ) {
		//log("Msg0:hndled request %"UINT64"",gettimeofdayInMilliseconds());
		int32_t size = -1;
		if ( list ) size     = list->getListSize();
		log(LOG_TIMING|LOG_DEBUG,
		    "net: msg0: Handled request for data. "
		    "Now sending data termId=%"UINT64" size=%"INT32""
		    " transId=%"INT32" ip=%s port=%i took=%"INT64" "
		    "(niceness=%"INT32").",
		    g_posdb.getTermId(msg5->m_startKey),
		    size,slot->m_transId,
		    iptoa(slot->m_ip),slot->m_port,
		    gettimeofdayInMilliseconds() - st0->m_startTime ,
		    st0->m_niceness );
	}
	// debug
	//if ( ! msg5->m_includeTree )
	//	log("hotit\n");
	// on error nuke the list and it's data
	if ( g_errno ) {
		mdelete ( st0 , sizeof(State00) , "Msg0" );
		delete (st0);
		// TODO: free "slot" if this send fails
		us->sendErrorReply ( slot , g_errno );
		return;
	}

	QUICKPOLL(st0->m_niceness);
	// point to the serialized list in "list"
	char *data      = list->getList();
	int32_t  dataSize  = list->getListSize();
	char *alloc     = list->getAlloc();
	int32_t  allocSize = list->getAllocSize();
	// tell list not to free the data since it is a reply so UdpServer
	// will free it when it destroys the slot
	list->setOwnData ( false );
	// keep track of stats
	Rdb *rdb = getRdbFromId ( st0->m_rdbId );
	if ( rdb ) rdb->sentReplyGet ( dataSize );
	// TODO: can we free any memory here???

	// keep track of how long it takes to complete the send
	st0->m_startTime = gettimeofdayInMilliseconds();
	// debug point
	int32_t oldSize = msg5->m_minRecSizes;
	int32_t newSize = msg5->m_minRecSizes + 20;
	// watch for wrap around
	if ( newSize < oldSize ) newSize = 0x7fffffff;
	if ( dataSize > newSize && list->getFixedDataSize() == 0 &&
	     // do not annoy me with these linkdb msgs
	     dataSize > newSize+100 ) 
		log(LOG_LOGIC,"net: msg0: Sending more data than what was "
		    "requested. Ineffcient. Bad engineer. dataSize=%"INT32" "
		    "minRecSizes=%"INT32".",dataSize,oldSize);
	/*
	// always compress these lists
	if ( st0->m_rdbId == RDB_SECTIONDB ) { // && 1 == 3) {

		// get sh48, the sitehash
		key128_t *startKey = (key128_t *)msg5->m_startKey ;
		int64_t sh48 = g_datedb.getTermId(startKey);

		// debug
		//log("msg0: got sectiondblist from disk listsize=%"INT32"",
		//    list->getListSize());

		if ( dataSize > 50000 )
			log("msg0: sending back list rdb=%"INT32" "
			    "listsize=%"INT32" sh48=0x%"XINT64"",
			    (int32_t)st0->m_rdbId,
			    dataSize,
			    sh48);

		// save it
		int32_t origDataSize = dataSize;
		// store compressed list on itself
		char *dst = list->m_list;
		// warn if niceness is 0!
		if ( st0->m_niceness == 0 )
			log("msg0: compressing sectiondb list at niceness 0!");
		// compress the list
		uint32_t lastVoteHash32 = 0LL;
		SectionVote *lastVote = NULL;
//.........这里部分代码省略.........
开发者ID:BlaBlaNet,项目名称:open-source-search-engine,代码行数:101,代码来源:Msg0.cpp

示例10: removeExpiredLocks

// hostId is the remote hostid sending us the lock request
void removeExpiredLocks ( int32_t hostId ) {
	// when we last cleaned them out
	static time_t s_lastTime = 0;

	int32_t nowGlobal = getTimeGlobalNoCore();

	// only do this once per second at the most
	if ( nowGlobal <= s_lastTime ) return;

	// shortcut
	HashTableX *ht = &g_spiderLoop.m_lockTable;

 restart:

	// scan the slots
	int32_t ns = ht->m_numSlots;
	// . clean out expired locks...
	// . if lock was there and m_expired is up, then nuke it!
	// . when Rdb.cpp receives the "fake" title rec it removes the
	//   lock, only it just sets the m_expired to a few seconds in the
	//   future to give the negative doledb key time to be absorbed.
	//   that way we don't repeat the same url we just got done spidering.
	// . this happens when we launch our lock request on a url that we
	//   or a twin is spidering or has just finished spidering, and
	//   we get the lock, but we avoided the negative doledb key.
	for ( int32_t i = 0 ; i < ns ; i++ ) {
		// breathe
		QUICKPOLL(MAX_NICENESS);
		// skip if empty
		if ( ! ht->m_flags[i] ) continue;
		// cast lock
		UrlLock *lock = (UrlLock *)ht->getValueFromSlot(i);
		int64_t lockKey = *(int64_t *)ht->getKeyFromSlot(i);
		// if collnum got deleted or reset
		collnum_t collnum = lock->m_collnum;
		if ( collnum >= g_collectiondb.m_numRecs ||
		     ! g_collectiondb.m_recs[collnum] ) {
			log("spider: removing lock from missing collnum "
			    "%" PRId32,(int32_t)collnum);
			goto nuke;
		}
		// skip if not yet expired
		if ( lock->m_expires == 0 ) continue;
		if ( lock->m_expires >= nowGlobal ) continue;
		// note it for now
		if ( g_conf.m_logDebugSpider )
			log("spider: removing lock after waiting. elapsed=%" PRId32"."
			    " lockKey=%" PRIu64" hid=%" PRId32" expires=%" PRIu32" "
			    "nowGlobal=%" PRIu32,
			    (nowGlobal - lock->m_timestamp),
			    lockKey,hostId,
			    (uint32_t)lock->m_expires,
			    (uint32_t)nowGlobal);
	nuke:
		// nuke the slot and possibly re-chain
		ht->removeSlot ( i );
		// gotta restart from the top since table may have shrunk
		goto restart;
	}
	// store it
	s_lastTime = nowGlobal;
}		
开发者ID:lemire,项目名称:open-source-search-engine,代码行数:63,代码来源:Msg12.cpp

示例11: log

// . now come here when we got the necessary index lists
// . returns false if blocked, true otherwise
// . sets g_errno on error
bool Msg39::intersectLists ( ) { // bool updateReadInfo ) {
	// bail on error
	if ( g_errno ) { 
	hadError:
		log("msg39: Had error getting termlists: %s.",
		    mstrerror(g_errno));
		if ( ! g_errno ) { char *xx=NULL;*xx=0; }
		//sendReply (m_slot,this,NULL,0,0,true);
		return true; 
	}
	// timestamp log
	if ( m_debug ) {
		log(LOG_DEBUG,"query: msg39: [%"PTRFMT"] "
		    "Got %"INT32" lists in %"INT64" ms"
		    , (PTRTYPE)this,m_tmpq.getNumTerms(),
		     gettimeofdayInMilliseconds() - m_startTime);
		m_startTime = gettimeofdayInMilliseconds();
	}

	// breathe
	QUICKPOLL ( m_r->m_niceness );

	// ensure collection not deleted from under us
	CollectionRec *cr = g_collectiondb.getRec ( m_r->m_collnum );
	if ( ! cr ) {
		g_errno = ENOCOLLREC;
		goto hadError;
	}

	// . set the IndexTable so it can set it's score weights from the
	//   termFreqs of each termId in the query
	// . this now takes into account the special termIds used for sorting
	//   by date (0xdadadada and 0xdadadad2 & TERMID_MASK)
	// . it should weight them so much so that the summation of scores
	//   from other query terms cannot make up for a lower date score
	// . this will actually calculate the top
	// . this might also change m_tmpq.m_termSigns 
	// . this won't do anything if it was already called
	m_posdbTable.init ( &m_tmpq                ,
			    m_debug              ,
			    this                   ,
			    &m_tt                  ,
			    m_r->m_collnum,//ptr_coll          , 
			    &m_msg2 , // m_lists                ,
			    //m_tmpq.m_numTerms      , // m_numLists
			    m_r                              );

	// breathe
	QUICKPOLL ( m_r->m_niceness );

	// . we have to do this here now too
	// . but if we are getting weights, we don't need m_tt!
	// . actually we were using it before for rat=0/bool queries but
	//   i got rid of NO_RAT_SLOTS
	if ( ! m_allocedTree && ! m_posdbTable.allocTopTree() ) {
		if ( ! g_errno ) { char *xx=NULL;*xx=0; }
		//sendReply ( m_slot , this , NULL , 0 , 0 , true);
		return true;
	}

	// if msg2 had ALL empty lists we can cut it int16_t
	if ( m_posdbTable.m_topTree->m_numNodes == 0 ) {
		//estimateHitsAndSendReply ( );
		return true;
	}
		

	// we have to allocate this with each call because each call can
	// be a different docid range from doDocIdSplitLoop.
	if ( ! m_posdbTable.allocWhiteListTable() ) {
		log("msg39: Had error allocating white list table: %s.",
		    mstrerror(g_errno));
		if ( ! g_errno ) { char *xx=NULL;*xx=0; }
		//sendReply (m_slot,this,NULL,0,0,true);
		return true; 
	}


	// do not re do it if doing docid range splitting
	m_allocedTree = true;


	// . now we must call this separately here, not in allocTopTree()
	// . we have to re-set the QueryTermInfos with each docid range split
	//   since it will set the list ptrs from the msg2 lists
	if ( ! m_posdbTable.setQueryTermInfo () ) return true;

	// print query term bit numbers here
	for ( int32_t i = 0 ; m_debug && i < m_tmpq.getNumTerms() ; i++ ) {
		QueryTerm *qt = &m_tmpq.m_qterms[i];
		//utf16ToUtf8(bb, 256, qt->m_term, qt->m_termLen);
		char *tpc = qt->m_term + qt->m_termLen;
		char  tmp = *tpc;
		*tpc = '\0';
		SafeBuf sb;
		sb.safePrintf("query: msg39: BITNUM query term #%"INT32" \"%s\" "
			      "bitnum=%"INT32" ", i , qt->m_term, qt->m_bitNum );
//.........这里部分代码省略.........
开发者ID:DeadNumbers,项目名称:open-source-search-engine,代码行数:101,代码来源:Msg39.cpp

示例12: reset

// returns false and sets g_errno on error
bool Title::setTitle ( Xml *xml, Words *words, int32_t maxTitleLen, Query *query,
                       LinkInfo *linkInfo, Url *firstUrl, const char *filteredRootTitleBuf, int32_t filteredRootTitleBufSize,
                       uint8_t contentType, uint8_t langId, int32_t niceness ) {
	// make Msg20.cpp faster if it is just has
	// Msg20Request::m_setForLinkInfo set to true, no need to extricate a title.
	if ( maxTitleLen <= 0 ) {
		return true;
	}

	m_niceness = niceness;
	m_maxTitleLen = maxTitleLen;

	// if this is too big the "first line" algo can be huge!!!
	// and really slow everything way down with a huge title candidate
	int32_t maxTitleWords = 128;

	// assume no title
	reset();

	int32_t NW = words->getNumWords();

	//
	// now get all the candidates
	//

	// . allow up to 100 title CANDIDATES
	// . "as" is the word # of the first word in the candidate
	// . "bs" is the word # of the last word IN the candidate PLUS ONE
	int32_t n = 0;
	int32_t as[MAX_TIT_CANDIDATES];
	int32_t bs[MAX_TIT_CANDIDATES];
	float scores[MAX_TIT_CANDIDATES];
	Words *cptrs[MAX_TIT_CANDIDATES];
	int32_t types[MAX_TIT_CANDIDATES];
	int32_t parent[MAX_TIT_CANDIDATES];

	// record the scoring algos effects
	float  baseScore        [MAX_TIT_CANDIDATES];
	float  noCapsBoost      [MAX_TIT_CANDIDATES];
	float  qtermsBoost      [MAX_TIT_CANDIDATES];
	float  inCommonCandBoost[MAX_TIT_CANDIDATES];

	// reset these
	for ( int32_t i = 0 ; i < MAX_TIT_CANDIDATES ; i++ ) {
		// assume no parent
		parent[i] = -1;
	}

	// xml and words class for each link info, rss item
	Xml   tx[MAX_TIT_CANDIDATES];
	Words tw[MAX_TIT_CANDIDATES];
	int32_t  ti = 0;

	// restrict how many link texts and rss blobs we check for titles
	// because title recs like www.google.com have hundreds and can
	// really slow things down to like 50ms for title generation
	int32_t kcount = 0;
	int32_t rcount = 0;

	//int64_t x = gettimeofdayInMilliseconds();

	// . get every link text
	// . TODO: repeat for linkInfo2, the imported link text
	for ( Inlink *k = NULL; linkInfo && (k = linkInfo->getNextInlink(k)) ; ) {
		// breathe
		QUICKPOLL(m_niceness);
		// fast skip check for link text
		if ( k->size_linkText >= 3 && ++kcount >= 20 ) continue;
		// fast skip check for rss item
		if ( k->size_rssItem > 10 && ++rcount >= 20 ) continue;

		// set Url
		Url u;
		u.set( k->getUrl(), k->size_urlBuf );

		// is it the same host as us?
		bool sh = true;

		// skip if not from same host and should be
		if ( firstUrl->getHostLen() != u.getHostLen() ) {
			sh = false;
		}

		// skip if not from same host and should be
		if ( strncmp( firstUrl->getHost(), u.getHost(), u.getHostLen() ) ) {
			sh = false;
		}

		// get the link text
		if ( k->size_linkText >= 3 ) {
			char *p    = k->getLinkText();
			int32_t  plen = k->size_linkText - 1;
			if ( ! verifyUtf8 ( p , plen ) ) {
				log("title: set4 bad link text from url=%s", k->getUrl());
				continue;
			}

			// now the words.
			if ( !tw[ti].set( k->getLinkText(), k->size_linkText - 1, true, 0 ) ) {
//.........这里部分代码省略.........
开发者ID:lemire,项目名称:open-source-search-engine,代码行数:101,代码来源:Title.cpp

示例13: is_digit

// . add the phrase that starts with the ith word
// . "read Of Mice and Men" should make 3 phrases:
// . read.ofmice
// . ofmice
// . mice.andmen
void Phrases::setPhrase ( int32_t i, int32_t niceness ) {
	// . if the ith word cannot start a phrase then we have no phrase
	// . we indicate NULL phrasesIds with a spam of PSKIP
	// . we now index all regardless! we want to be able to search
	//   for "a thing" or something. so do it!
	//if ( ! m_bits->canStartPhrase ( i ) ) {
	//	m_phraseSpam[i] = PSKIP; 
	//	m_phraseIds [i] = 0LL;
	//	return;
	//}

	// MDW: now Weights.cpp should encompass all this logic
	// or if score <= 0, set in Scores.cpp
	//if ( m_wordScores && m_wordScores[i] <= 0 ) {
	//	m_phraseSpam[i] = PSKIP; 
	//	m_phraseIds [i] = 0LL;
	//	return;
	//}

	// hash of the phrase
	int64_t h   = 0LL; 
	// the hash of the two-word phrase (now we do 3,4 and 5 word phrases)
	int64_t h2  = 0LL; 
	int64_t h3  = 0LL; 
	//int64_t h4  = 0LL; 
	//int64_t h5  = 0LL; 
	// reset
	unsigned char pos = 0;
	// now look for other tokens that should follow the ith token
	int32_t          nw               = m_words->getNumWords();
	int32_t          numWordsInPhrase = 1;
	// use the min spam from all words in the phrase as the spam for phrase
	char minSpam = -1;
	// we need to hash "1 / 8" differently from "1.8" from "1,000" etc.
	char isNum = is_digit(m_wptrs[i][0]);
	// min score
	//int32_t minScore ;
	//if ( m_wordScores ) minScore = m_wordScores[i];
	// if i is not a stop word, it can set the min spam initially
	//if ( ! m_bits->isStopWord(i) &&m_spam ) minSpam = m_spam->getSpam(i);
	// do not include punct/tag words in the m_numWordsTotal[j] count
	// of the total words in the phrase. these are just usesless tails.
	int32_t lastWordj = -1;
	// loop over following words
	int32_t j;
	bool hasHyphen ;
	bool hasStopWord2 ;

	// . NOTE: a token can start a phrase but NOT be in it. 
	// . like a large number for example.
	// . wordId is the lower ascii hash of the ith word
	// . NO... this is allowing the query operator PiiPe to start
	//   a phrase but not be in it, then the phrase id ends up just
	//   being the following word's id. causing the synonyms code to
	//   give a synonym which it should not un Synonyms::set()
	if ( ! m_bits->canBeInPhrase(i) )
		// so indeed, skip it then
		goto nophrase;

	//h = hash64 ( h, m_words->getWordId(i));
	h = m_wids[i];
	// set position
	pos = (unsigned char)m_wlens[i];
	//if (m_words->getStripWordId(i)) 
	//	h2 = hash64 ( h2, m_words->getStripWordId(i));
	//else h2 = h;

	hasHyphen = false;
	hasStopWord2 = m_bits->isStopWord(i);
	// this makes it true now too
	//if ( m_wlens[i] <= 2 ) hasStopWord = true;

	for ( j = i + 1 ; j < nw ; j++ ) {
		QUICKPOLL(niceness);

		// . do not allow more than 32 alnum/punct "words" in a phrase
		// . this prevents phrases with 100,000 words from slowing
		//   us down. would put us in a huge double-nested for loop
		if ( j > i + 32 ) goto nophrase;
		// deal with punct words
		if ( ! m_wids[j] ) {
			// if we cannot pair across word j then break
			if ( ! m_bits->canPairAcross (j) ) break;
			// does it have a hyphen?
			if (j==i+1 && m_words->hasChar(j,'-')) hasHyphen=true;
			/*
			// "D & B" --> dandb
			if (j==i+1 && m_words->hasChar(j,'&')) {
				// set this
				hasStopWord = true;
				// insert "and"
				int32_t conti=pos;
				h = hash64Lower_utf8_cont("and",3,h,&conti);
				pos=conti;
				// the two-word phrase, set it if we need to
//.........这里部分代码省略.........
开发者ID:BlaBlaNet,项目名称:open-source-search-engine,代码行数:101,代码来源:Phrases.cpp

示例14: log

// . when a dump completes we free the primary mem space and make
//   the secondary mem space the new primary mem space
void RdbMem::freeDumpedMem( RdbTree *tree ) {
	// bail if we have no mem
	if ( m_memSize == 0 ) return;
		
    log("rdbmem: start freeing dumped mem");

    //char *memEnd = m_mem + m_memSize;

    // this should still be true so allocData() returns m_ptr2 ptrs
    if ( ! m_rdb->m_inDumpLoop ) { g_process.shutdownAbort(true); }

    // count how many data nodes we had to move to avoid corruption
    int32_t count = 0;
    int32_t scanned = 0;
    for ( int32_t i = 0 ; i < tree->m_minUnusedNode ; i++ ) {
		// give up control to handle search query stuff of niceness 0
		QUICKPOLL ( MAX_NICENESS );
		// skip node if parents is -2 (unoccupied)
		if ( tree->m_parents[i] == -2 ) continue;
		scanned++;
		// get the ptr
		char *data = tree->m_data[i];
		if ( ! data ) continue;
		// how could it's data not be stored in here?
		// if ( data < m_mem ) {
		//      log("rdbmem: bad data1");
		//      continue;
		// }
		// if ( data >= memEnd ) {
		//      log("rdbmem: bad data2");
		//      continue;
		// }
		// is it in primary mem? m_ptr1 mem was just dump
		// if growing upward
		bool needsMove = false;
		// if the primary mem (that was dumped) is
		// growing upwards
		if ( m_ptr1 < m_ptr2 ) {
			// and the node data is in it...
			if ( data < m_ptr1 )
				needsMove = true;
		}
		// growing downward otherwise
		else if ( data >= m_ptr1 ) {
			needsMove = true;
		}
		if ( ! needsMove ) continue;
		// move it. m_inDumpLoop should still
		// be true so we will get added to
		// m_ptr2
		int32_t size;
		if ( tree->m_sizes ) size = tree->m_sizes[i];
		else size = tree->m_fixedDataSize;
			
		if ( size < 0 ) { g_process.shutdownAbort(true); }
		if ( size == 0 ) continue;
			
		// m_inDumpLoop is still true at this point so
		// so allocData should return m_ptr2 guys
		char *newData = (char *)allocData(NULL,size,0);
		if ( ! newData ) {
			log("rdbmem: failed to alloc %i "
				"bytes node %i",(int)size,(int)i);
			continue;
		}
		// debug test
		bool stillNeedsMove = false;
		if ( m_ptr1 < m_ptr2 ) {
			// and the node data is in it...
			if ( newData < m_ptr1 )
				stillNeedsMove = true;
		}
		// growing downward otherwise
		else if ( newData >= m_ptr1 ) {
			stillNeedsMove = true;
		}
		if ( stillNeedsMove ) {// this should never happen!!
			log("rdbmem: olddata=0x%" PTRFMT" newdata=0x%" PTRFMT,
				(PTRTYPE)data, (PTRTYPE)newData);
			log("rdbmem: still needs move!");
		}
		count++;
		gbmemcpy(newData,data,size);
		tree->m_data[i] = newData;
    }
    if ( count > 0 )
		log("rdbmem: moved %i tree nodes for %s",(int)count,
			m_rdb->m_dbname);
		
	log("rdbmem: stop freeing dumped mem. scanned %i nodes.",(int)scanned);		

	// save primary ptr
	char *tmp = m_ptr1;
	// debug
	//logf(LOG_DEBUG,
	//     "db: freeing dumped mem ptr1=%" PRIx32" ptr2=%" PRIx32".",m_ptr1,m_ptr2);
	// primary pointer, m_ptr1, becomes m_ptr2
	m_ptr1 = m_ptr2;
//.........这里部分代码省略.........
开发者ID:exename,项目名称:open-source-search-engine,代码行数:101,代码来源:RdbMem.cpp

示例15: memset

// returns -1 and sets g_errno on error, because 0 means langUnknown
long Words::getLanguage( Sections *sections ,
			 long maxSamples,
			 long niceness,
			 long *langScore) {
	// calculate scores if not given
	//Scores calcdScores;
	//if ( ! scores ) {
	//	if ( ! calcdScores.set( this,m_version,false ) )
	//		return -1;
	//	scores = &calcdScores;
	//}

	// . take a random sample of words and look them up in the
	//   language dictionary
	//HashTableT<long long, char> ht;
	HashTableX ht;
	long long langCount[MAX_LANGUAGES];
	long long langWorkArea[MAX_LANGUAGES];
	long numWords = m_numWords;
	//long skip = numWords/maxSamples;
	//if ( skip == 0 ) skip = 1;
	// reset the language count
	memset(langCount, 0, sizeof(long long)*MAX_LANGUAGES);
	// sample the words
	//long wordBase  = 0;
	long wordi     = 0;
	//if ( ! ht.set(maxSamples*1.5) ) return -1;
	if ( ! ht.set(8,1,(long)(maxSamples*8.0),NULL,0,false,
		      niceness,"wordslang")) 
		return -1;
 
	// . avoid words in these bad sections
	// . google seems to index SEC_MARQUEE so i took that out of badFlags
	long badFlags = SEC_SCRIPT|SEC_STYLE|SEC_SELECT;
	// shortcuts
	long long *wids  = m_wordIds;
	long      *wlens = m_wordLens;
	char     **wptrs = m_words;

	//long langTotal = 0;
// 	log ( LOG_WARN, "xmldoc: Picking language from %li words with %li skip",
// 			numWords, skip );
	char numOne = 1;
	Section **sp = NULL;
	if ( sections ) sp = sections->m_sectionPtrs;
	// this means null too
	if ( sections && sections->m_numSections == 0 ) sp = NULL;

	long maxCount = 1000;

	while ( wordi < numWords ) {
		// breathe
		QUICKPOLL( niceness );
		// move to the next valid word
		if ( ! wids [wordi]     ) { wordi++; continue; }
		if (   wlens[wordi] < 2 ) { wordi++; continue; }
		// skip if in a bad section
		//long flags = sections->m_sectionPtrs[i]->m_flags;
		// meaning script section ,etc
		if ( sp && ( sp[wordi]->m_flags & badFlags ) ) {
			wordi++; continue; }
		// check the language
		//unsigned char lang = 0;

		// Skip if word is capitalized and not preceded by a tag
		//if(s_isWordCap(getWord(wordi), getWordLen(wordi)) &&
		//   wordi > 0 && !getTagId(wordi - 1)) {
		//	wordi++;
		//	continue;
		//}

		// Skip word if bounded by '/' or '?' might be in a URL
		if(isBounded(wordi)) {
			wordi++;
			continue;
		}

		// is it arabic? sometimes they are spammy pages and repeat
		// a few arabic words over and over again, so don't do deduping
		// with "ht" before checking this.
		char cl = getCharacterLanguage ( wptrs[wordi] );
		if ( cl ) {
		        langCount[(unsigned char)cl]++;
			wordi++;
			continue;
		}

		//if(ht.getSlot(m_wordIds[wordi]) !=-1) {
		if(!ht.isEmpty(&m_wordIds[wordi]) ) {
			wordi++;
			continue;
		}

		// If we can't add the word, it's not that bad.
		// Just gripe about it in the log.
		if(!ht.addKey(&m_wordIds[wordi], &numOne)) {
			log(LOG_WARN, "build: Could not add word to temporary "
			    "table, memory error?\n");
			g_errno = ENOMEM;
//.........这里部分代码省略.........
开发者ID:BillWangCS,项目名称:open-source-search-engine,代码行数:101,代码来源:Words.cpp


注:本文中的QUICKPOLL函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。