本文整理汇总了Python中dbclient.DbClient.getCollection方法的典型用法代码示例。如果您正苦于以下问题:Python DbClient.getCollection方法的具体用法?Python DbClient.getCollection怎么用?Python DbClient.getCollection使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dbclient.DbClient
的用法示例。
在下文中一共展示了DbClient.getCollection方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from dbclient import DbClient [as 别名]
# 或者: from dbclient.DbClient import getCollection [as 别名]
def main():
targetDb = "jobaly"
targetClient = DbClient('localhost', 27017, targetDb)
srcDb = "jobaly_daily"
srcClient = DbClient('localhost', 27017, srcDb)
targetCollName = "job1000"
srcCollnames = "daily_job_info_2014-06-16"
srcColl = srcClient.getCollection(srcCollnames)
targetColl = targetClient.getCollection(targetCollName)
size = 1000
copyCollection(srcColl, targetColl, size)
示例2: getByCities
# 需要导入模块: from dbclient import DbClient [as 别名]
# 或者: from dbclient.DbClient import getCollection [as 别名]
def getByCities():
cities = [
"Austin, TX",
"San Jose, CA",
"Portland, OR",
" New York, NY",
"Houston, TX",
"Boston, MA",
"Davis, CA",
"Palo Alto, CA",
" Irvine, CA",
"Olathe, KS",
"Columbia, MD",
" Atlanta, GA",
]
param = {"q": "software engineer", "fromage": "30"}
collectionName = "job_se_10city"
indeedClient = ApiClient(param)
# client.getPage(0)
dbClient = DbClient("localhost", 27017, "jobaly")
collection = dbClient.getCollection(collectionName)
for city in cities:
print "-----prcoss city %s -------" % city
indeedClient.processQuery(collection, "l", city)
示例3: main
# 需要导入模块: from dbclient import DbClient [as 别名]
# 或者: from dbclient.DbClient import getCollection [as 别名]
def main():
collectionName = "job_se_10city"
infoCollectionName = "jobinfo_se_10city"
collectionName = "job_lang_top_corps"
infoCollectionName = "jobinfo_lang_top_corps"
dbClient = DbClient('localhost', 27017, "jobaly")
collection = dbClient.getCollection(collectionName)
infoCollection = dbClient.getCollection(infoCollectionName)
pageSize = 20
pageNo = 1
has_more = True
pageNum = 10000
find_sort = None
find_spec=None
threadNum = 20
queue = Queue.Queue()
for i in range(threadNum):
t = JobGetter(queue,infoCollection)
t.setDaemon(True)
t.start()
while has_more and pageNo <= pageNum :
page = dbClient.getPage(collection, find_spec,find_sort, pageSize, pageNo)
queue.put( (page,pageNo) )
pageNo+=1
count = page.count(with_limit_and_skip = True)
# print "count=",count
if ( count < pageSize ) :
has_more = False
queue.join()
示例4: main
# 需要导入模块: from dbclient import DbClient [as 别名]
# 或者: from dbclient.DbClient import getCollection [as 别名]
def main():
collectionName = "job_lang_top_corps"
infoCollectionName = "jobinfo_lang_top_corps"
dbClient = DbClient('localhost', 27017, "jobaly")
collection = dbClient.getCollection(collectionName)
infoCollection = dbClient.getCollection(infoCollectionName)
getter = IndeedPageGetter(infoCollection)
pageSize = 10
pageNo = 149
has_more = True
pageNum = 10000
find_sort = None
find_spec=None
while has_more and pageNo <= pageNum :
page = dbClient.getPage(collection, find_spec,find_sort, pageSize, pageNo)
getter.processPage(page,pageNo)
pageNo+=1
count = page.count(with_limit_and_skip = True)
# print "count=",count
if ( count < pageSize ) :
has_more = False
示例5: main
# 需要导入模块: from dbclient import DbClient [as 别名]
# 或者: from dbclient.DbClient import getCollection [as 别名]
def main():
pageSize = 100
startPageNo = 13
endPageNo = 10000
dbClient = DbClient('localhost', 27017, "SimilarQuestion")
collection = dbClient.getCollection("question_test")
questionGetter = QuestionGetter(pageSize,"python")
for pg in range(startPageNo, endPageNo):
print "--get page at : %d -----" % pg
items = questionGetter.getPage(pg)
if items == "NO_ITEMS":
break
print "--page at : %d have %d questions--" % (pg, len(items))
questionGetter.savePage(collection,items)
time.sleep(10)
示例6: getByCorps
# 需要导入模块: from dbclient import DbClient [as 别名]
# 或者: from dbclient.DbClient import getCollection [as 别名]
def getByCorps():
print " --- get job by companies---"
collectionName = "job_se_top_corps"
param = {"q": "software engineer", "fromage": "30"}
indeedClient = ApiClient(param)
# client.getPage(0)
dbClient = DbClient("localhost", 27017, "jobaly")
collection = dbClient.getCollection(collectionName)
corps = []
fileName = "topcorps.txt"
with open(fileName, "r") as the_file:
for line in the_file:
word = line.strip()
if not len(word) == 0:
corps.append(word)
for corp in corps:
q = indeedClient.buildQuery("software engineer", {"company": corp})
print "-----prcoss corp %s -------" % corp
indeedClient.processQuery(collection, "q", q)
示例7: getByLang
# 需要导入模块: from dbclient import DbClient [as 别名]
# 或者: from dbclient.DbClient import getCollection [as 别名]
def getByLang():
print " --- get job by language and companies---"
collectionName = "job_lang_top_corps"
param = { "q" : "software engineer",
"fromage" : "30" }
lang_names = utils.loadArrayFromFile("pro_langs.txt")
corps_names = utils.loadArrayFromFile("topcorps.txt")
indeedClient= ApiClient( param )
# client.getPage(0)
dbClient = DbClient('localhost', 27017, "jobaly")
collection = dbClient.getCollection(collectionName)
for corp in corps_names:
for lang in lang_names:
q = indeedClient.buildQuery(lang, {"company": corp })
print "-----prcoss corp %s with language %s -------" % (corp, lang)
indeedClient.processQuery(collection, "q", q)
示例8: main
# 需要导入模块: from dbclient import DbClient [as 别名]
# 或者: from dbclient.DbClient import getCollection [as 别名]
def main():
pageSize = 100
startPageNo = 1
endPageNo = 10000
dbClient = DbClient('localhost', 27017, "SimilarQuestion")
collection = dbClient.getCollection("english_questions")
questionGetter = QuestionGetter(pageSize,"")
for pg in range(startPageNo, endPageNo):
print "--- get page %d ---" %pg
items = questionGetter.getPage(pg)
# print items
if ( items == "NO_MORE" ) :
print "have no more questions, quit program !!"
break
print "--- page %d has %d questions ---" %(pg,len(items))
if ( items != "NO_ITEMS" ) :
i = questionGetter.savePage(collection,items)
print "--- page %d has save %d question " %(pg,i)
示例9: main
# 需要导入模块: from dbclient import DbClient [as 别名]
# 或者: from dbclient.DbClient import getCollection [as 别名]
def main():
collectionName = "job_lang_top_corps"
dbClient = DbClient('localhost', 27017, "jobaly")
collection = dbClient.getCollection(collectionName)
title_dict = {}
for job in collection.find():
# print job["_id"], job["jobtitle"]
title = job["jobtitle"]
if title_dict.has_key(title):
title_dict[title] += 1
else :
title_dict[title] = 1
stat_file_name = "jobtitle_stat.txt"
with open( stat_file_name , "w") as text_file:
i = 0
for (key, value) in sorted(title_dict.iteritems(), key=operator.itemgetter(1), reverse = True):
# print key, ":", value
text_file.write("%s : %s \n" % (key.encode('utf8'),value))
i+=1
print i, " lines had been writen into file:", stat_file_name
示例10: main
# 需要导入模块: from dbclient import DbClient [as 别名]
# 或者: from dbclient.DbClient import getCollection [as 别名]
def main():
cities = ['MoutainView, CA', 'Seattle, WA', 'San Diego, CA', 'San Francisco, CA', 'Austin, TX',
'San Jose, CA','Portland, OR',' New York, NY','Houston, TX','Boston, MA',
'Davis, CA', 'Palo Alto, CA', ' Irvine, CA', 'Olathe, KS', 'Columbia, MD', ' Atlanta, GA' ]
cities = [ 'Austin, TX',
'San Jose, CA','Portland, OR',' New York, NY','Houston, TX','Boston, MA',
'Davis, CA', 'Palo Alto, CA', ' Irvine, CA', 'Olathe, KS', 'Columbia, MD', ' Atlanta, GA' ]
_pageSize = 25
_fromage = 30
_location = 94040
_radius = 25
_query = "software engineer"
collectionName = "job_se_10city"
indeedClient= ApiClient(_query, _pageSize, _fromage, _location, _radius )
# client.getPage(0)
dbClient = DbClient('localhost', 27017, "jobaly")
collection = dbClient.getCollection(collectionName)
for city in cities:
print "-----prcoss city %s -------" %city
indeedClient.processCity(collection,city)
示例11: __init__
# 需要导入模块: from dbclient import DbClient [as 别名]
# 或者: from dbclient.DbClient import getCollection [as 别名]
class DataProcessor:
def __init__(self):
self.dbClient = DbClient("localhost", 27017, "SimilarQuestion")
@staticmethod
def processQuestion(question):
a = {}
a["qid"] = question["_id"]
a["title"] = question["title"]
return a
@staticmethod
def processLinkedQuestion(question):
a = {}
a["qid"] = question["_id"]
a["title"] = question["title"]
a["linked"] = []
for item in question["items"]:
b = {}
b["qid"] = item["question_id"]
b["title"] = item["title"]
print b
a["linked"].append(b)
return a
@staticmethod
def processLinkedQuestion2(question):
a = {}
a["qid"] = question["_id"]
a["linked"] = []
for item in question["items"]:
a["linked"].append(item["question_id"])
return a
@staticmethod
def processRelatedQuestion(question):
a = {}
a["qid"] = question["_id"]
a["title"] = question["title"]
a["related"] = []
for item in question["items"]:
b = {}
b["qid"] = item["question_id"]
b["title"] = item["title"]
# print b
a["related"].append(b)
return a
def dumpDataToFile(self, queFun, collection, find_spec, find_sort, fileName, pageNum):
pageSize = 1000
pageNo = 1
has_more = True
with open(fileName, "w") as the_file:
# the_file.write('Hello\n')
while has_more and pageNo <= pageNum:
page = self.dbClient.getPage(collection, find_spec, find_sort, pageSize, pageNo)
pageNo += 1
count = page.count(with_limit_and_skip=True)
print "count=", count
if count < pageSize:
has_more = False
for item in page:
a = queFun(item)
jstr = json.dumps(a) + "\n"
the_file.write(jstr)
print " page %d saved %d lines in file" % (pageNo - 1, count)
def dumpPythonQuestions(self, pageNum):
question_coll = self.dbClient.getCollection("question_test")
fileName = "..\..\data\pyton_questions.txt"
self.dumpDataToFile(DataProcessor.processQuestion, question_coll, fileName, pageNum)
def dumpLinkedQuestions(self, pageNum):
question_coll = self.dbClient.getCollection("question_link_python")
fileName = "..\..\data\question_link_python.txt"
find_spec = {"items": {"$exists": True}, "$where": "this.items.length > 5"}
find_sort = {"items": {"$size": -1}}
self.dumpDataToFile(DataProcessor.processLinkedQuestion, question_coll, find_spec, find_sort, fileName, pageNum)
def dumpLinkedQuestions2(self, pageNum):
question_coll = self.dbClient.getCollection("question_link_python")
fileName = "..\..\data\python_linked.txt"
find_spec = {"items": {"$exists": True}, "$where": "this.items.length > 1"}
find_sort = {"items": {"$size": -1}}
self.dumpDataToFile(
DataProcessor.processLinkedQuestion2, question_coll, find_spec, find_sort, fileName, pageNum
)
def dumpRelatedQuestions(self, pageNum):
question_coll = self.dbClient.getCollection("related_python")
fileName = "..\..\data\question_related_python.txt"
find_spec = {"items": {"$exists": True}, "$where": "this.items.length > 5"}
find_sort = None
self.dumpDataToFile(
DataProcessor.processRelatedQuestion, question_coll, find_spec, find_sort, fileName, pageNum
)