本文整理汇总了Python中solr.SolrConnection.query方法的典型用法代码示例。如果您正苦于以下问题:Python SolrConnection.query方法的具体用法?Python SolrConnection.query怎么用?Python SolrConnection.query使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类solr.SolrConnection
的用法示例。
在下文中一共展示了SolrConnection.query方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: word_matches_for_page
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
def word_matches_for_page(page_id, words):
"""
Gets a list of pre-analyzed words for a list of words on a particular
page. So if you pass in 'manufacturer' you can get back a list like
['Manufacturer', 'manufacturers', 'MANUFACTURER'] etc ...
"""
solr = SolrConnection(settings.SOLR)
# Make sure page_id is of type str, else the following string
# operation may result in a UnicodeDecodeError. For example, see
# ticket #493
if not isinstance(page_id, str):
page_id = str(page_id)
ocr_list = ['ocr', ]
ocr_list.extend(['ocr_%s' % l for l in settings.SOLR_LANGUAGES])
ocrs = ' OR '.join([query_join(words, o) for o in ocr_list])
q = 'id:%s AND (%s)' % (page_id, ocrs)
params = {"hl.snippets": 100, "hl.requireFieldMatch": 'true', "hl.maxAnalyzedChars": '102400'}
response = solr.query(q, fields=['id'], highlight=ocr_list, **params)
if page_id not in response.highlighting:
return []
words = set()
for ocr in ocr_list:
if ocr in response.highlighting[page_id]:
for context in response.highlighting[page_id][ocr]:
words.update(find_words(context))
return list(words)
示例2: _get_count
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
def _get_count(self):
"Returns the total number of objects, across all pages."
if self._count is None:
solr = SolrConnection(settings.SOLR) # TODO: maybe keep connection around?
solr_response = solr.query(self._q, fields=['id'])
self._count = int(solr_response.results.numFound)
return self._count
示例3: oralookup
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
def oralookup(pid=None, uuid=None, fields_to_return="f_name, f_subject, f_keyphrase, faculty, f_institution, thesis_type, content_type, collection", endpoint="http://ora.ouls.ox.ac.uk:8080/solr/select"):
s = SolrConnection(endpoint)
results = {}
query = ""
if pid:
pid = "\:".join(pid.split(":"))
query = "id:%s" % pid
elif uuid:
query = "id:uuid\:%s" % uuid
else:
return results
# Running actual query (3 tries, failover)
tries = 0
while(tries != 3):
try:
r = s.query(q = query, fields = fields_to_return)
logger.debug("Solr response: %s" % r.header)
tries = 3
except BadStatusLine:
sleep(0.5)
tries = tries + 1
try:
assert len(r.results) == 1
return r.results[0]
except ValueError:
logger.warn("Couldn't parse json response from Solr endpoint: %s" % r)
return {}
except AssertionError:
logger.warn("Couldn't assert that only a single result was fetched: %s" % results)
return {}
示例4: get_page_text
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
def get_page_text(page):
no_text = ["Text not available"]
solr = SolrConnection(settings.SOLR)
query = 'id:"%s"' % page.url
solr_results = solr.query(query)
results_attribute = getattr(solr_results, 'results', None)
if isinstance(results_attribute, list) and len(results_attribute) > 0:
return results_attribute[0].get('ocr', no_text)
else:
return no_text
示例5: similar_pages
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
def similar_pages(page):
solr = SolrConnection(settings.SOLR)
d = page.issue.date_issued
year, month, day = '{0:02d}'.format(d.year), '{0:02d}'.format(d.month), '{0:02d}'.format(d.day)
date = ''.join(map(str, (year, month, day)))
query = '+type:page AND date:%s AND %s AND NOT(lccn:%s)' % (date, query_join(map(lambda p: p.city,
page.issue.title.places.all()), 'city'), page.issue.title.lccn)
response = solr.query(query, rows=25)
results = response.results
return map(lambda kwargs: utils.get_page(**kwargs),
map(lambda r: urlresolvers.resolve(r['id']).kwargs, results))
示例6: execute_solr_query
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
def execute_solr_query(query, fields, sort, sort_order, rows, start):
solr = SolrConnection(settings.SOLR) # TODO: maybe keep connection around?
solr_response = solr.query(query,
fields=['lccn', 'title',
'edition',
'place_of_publication',
'start_year', 'end_year',
'language'],
rows=rows,
sort=sort,
sort_order=sort_order,
start=start)
return solr_response
示例7: execute_solr_query
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
def execute_solr_query(query, fields, sort, sort_order, rows, start):
# default arg_separator - underscore wont work if fields to facet on
# themselves have underscore in them
solr = SolrConnection(settings.SOLR) # TODO: maybe keep connection around?
solr_response = solr.query(query,
fields=['lccn', 'title',
'edition',
'place_of_publication',
'start_year', 'end_year',
'language'],
rows=rows,
sort=sort,
sort_order=sort_order,
start=start)
return solr_response
示例8: __init__
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
class QuoteResource:
def __init__(self):
self.solr = SolrConnection('http://localhost:8983/solr')
@cherrypy.expose
def index(self, callback=None, person=None, topic=None):
filters = []
filters.append('type:quote')
if person:
filters.append('person_t:%s' % person)
if topic:
filters.append('quote_t:%s' % topic)
results = self.solr.query(q = ' AND '.join(filters), rows=100)
docs = []
timeline = {
'timeline':
{
'headline':'OnTheRecord',
'type':'default',
'startDate':'2012,1,1',
'text':'We help you track quotations from politicians over time',
}
}
for result in results:
doc = {
"startDate":result['date'].strftime('%Y,%m,%d'),
"headline":result['person'],
"text":'<a href="' + result['url'] + '">'+result['title'] +'</a>',
"asset":
{
"media":"<blockquote>\""+result['quote'] + "\"</blockquote>",
"credit":"",
"caption":""
}
}
docs.append(doc)
timeline['timeline']['date'] = docs;
cherrypy.response.headers['Content-Type'] = 'application/json; charset=utf-8'
return json.dumps(timeline, ensure_ascii=False, indent=4).encode('utf-8')
示例9: page
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
def page(self, number):
"""
Override the page method in Paginator since Solr has already
paginated stuff for us.
"""
number = self.validate_number(number)
# figure out the solr query and execute it
solr = SolrConnection(settings.SOLR) # TODO: maybe keep connection around?
start = self.per_page * (number - 1)
params = {"hl.snippets": 100, # TODO: make this unlimited
"hl.requireFieldMatch": 'true', # limits highlighting slop
"hl.maxAnalyzedChars": '102400', # increased from default 51200
}
sort_field, sort_order = _get_sort(self.query.get('sort'), in_pages=True)
solr_response = solr.query(self._q,
fields=['id', 'title', 'date', 'sequence',
'edition_label', 'section_label'],
highlight=self._ocr_list,
rows=self.per_page,
sort=sort_field,
sort_order=sort_order,
start=start,
**params)
pages = []
for result in solr_response.results:
page = models.Page.lookup(result['id'])
if not page:
continue
words = set()
coords = solr_response.highlighting[result['id']]
for ocr in self._ocr_list:
for s in coords.get(ocr) or []:
words.update(find_words(s))
page.words = sorted(words, key=lambda v: v.lower())
page.highlight_url = self.highlight_url(page.url,
page.words,
number, len(pages))
pages.append(page)
return Page(pages, number, self)
示例10: __init__
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
class PersonResource:
def __init__(self):
self.solr = SolrConnection('http://localhost:8983/solr')
@cherrypy.expose
def index(self):
results = self.solr.query('*:*', facet='true', facet_field='person')
for person in results.facet_counts[u'facet_fields'][u'person']:
print person
cherrypy.response.headers['Content-Type'] = 'application/json; charset=utf-8'
return json.dumps({
'test': 'test',
'data': 'data'
}, ensure_ascii=False, indent=4).encode('utf-8')
示例11: __init__
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
def __init__(self, query):
self.query = query.copy()
# figure out the solr query
q = title_search(self.query)
try:
page = int(self.query.get('page'))
except:
page = 1
try:
rows = int(self.query.get('rows'))
except:
rows = 50
start = rows * (page - 1)
# determine sort order
sort_field, sort_order = _get_sort(self.query.get('sort'))
# execute query
solr = SolrConnection(settings.SOLR) # TODO: maybe keep connection around?
solr_response = solr.query(q,
fields=['lccn', 'title',
'edition',
'place_of_publication',
'start_year', 'end_year',
'language'],
rows=rows,
sort=sort_field,
sort_order=sort_order,
start=start)
# convert the solr documents to Title models
# could use solr doc instead of going to db, if performance requires it
lccns = [d['lccn'] for d in solr_response.results]
results = []
for lccn in lccns:
try:
title = models.Title.objects.get(lccn=lccn)
results.append(title)
except models.Title.DoesNotExist, e:
pass # TODO: log exception
示例12: setUpClass
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
def setUpClass(cls):
# First, add a folio to Solr so that the image_uri can be retrieved during the MEI conversion
# Using curl here because it turned out to be easier than solrconn.add and gives better error messages
os.system("curl {0}/update/?commit=true -H 'Content-Type: text/xml' -d '<add><doc>\
<field name=\"id\">testid</field>\
<field name=\"type\">cantusdata_folio</field>\
<field name=\"manuscript_id\">{1}</field>\
<field name=\"number\">{2}</field>\
<field name=\"image_uri\">{3}</field>\
</doc></add>'".format(settings.SOLR_SERVER, MEI_FIXTURE_ID, MEI_FIXTURE_FOLIO, MEI_FIXTURE_URI))
docs = list(MEIConverter.process_file(MEI_FIXTURE, MEI_FIXTURE_SIGLUM, MEI_FIXTURE_ID))
# Sanity check
solrconn = SolrConnection(settings.SOLR_SERVER)
prequery = solrconn.query('type:cantusdata_music_notation AND manuscript:' + MEI_FIXTURE_SIGLUM)
assert prequery.numFound == 0, 'MEI was already in the database when loading the test fixture'
solrconn.add_many(docs)
solrconn.commit()
示例13: Exception
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
if options.wiki:
query = 'host:%s' % (options.wiki)
elif not options.query:
raise Exception('A wiki is required, passed as host name')
if options.query:
query += ' '+options.query
specifier = options.wiki if options.wiki else str(os.getpid())
conn = SolrConnection('http://search-s10.prod.wikia.net:8983/solr')
print query
response = conn.query(query, fields=['html_en','nolang_txt','html', 'title', 'title_en', 'id'], rows=100)
paginator = SolrPaginator(response)
def initialize_dir(page):
paths = [options.dest, specifier, str(page)]
fullpath = ''
for path in paths:
fullpath += path + '/'
if not os.path.exists(fullpath):
os.mkdir(fullpath)
return fullpath
for page in paginator.page_range:
pagedir = initialize_dir(page)
lockfilepath = pagedir+'/LOCK'
with open(lockfilepath, 'w') as lockfile:
示例14: Exception
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
if not options.wiki:
raise Exception('A wiki is required, passed as host name')
conn = SolrConnection('http://search-s10.prod.wikia.net:8983/solr')
query = ["host:'%s'" % (options.wiki)]
query += ['ns:%d ' % (int(options.namespace))]
if options.start_date or options.end_date:
start = options.start_date + 'T00:00:00.000Z' if options.start_date else '*'
end = options.end_date + 'T00:00:00.000Z' if options.end_date else '*'
query += ['created:[%s TO %s]' % (start, end)]
response = conn.query(' AND '.join(query), fields=['html_en','nolang_txt','html'])
paginator = SolrPaginator(response)
print paginator.count, 'results to chomp through...'
polarities, subjectivities = [], []
for page in paginator.page_range:
for doc in paginator.page(page).object_list:
sent = sentiment.sentiment(doc.get('html_en', doc.get('nolang_txt', doc.get('html'))))
if ( sent == (0,0)):
continue
polarities.append(sent[0])
subjectivities.append(sent[1])
if page % int(paginator.num_pages/10) == 0:
print "========","On page", page, "of", paginator.num_pages, "======="
示例15: Exception
# 需要导入模块: from solr import SolrConnection [as 别名]
# 或者: from solr.SolrConnection import query [as 别名]
"-w", "--wiki", dest="wiki", action="store", default=None, help="Specifies the wiki to perform calculations against"
)
parser.add_option(
"-n", "--sents", dest="num_sents", action="store", default=5, help="Specifies the number of sentences to write"
)
(options, args) = parser.parse_args()
if options.id:
query = "id:%s" % (options.id)
elif options.wiki:
query = "host:'%s' AND ns:0" % (options.wiki)
else:
raise Exception("A wiki or ID is required, passed as host name")
conn = SolrConnection("http://search-s10.prod.wikia.net:8983/solr")
response = conn.query(query, fields=["html_en", "nolang_txt", "html", "title", "title_en", "id"])
paginator = SolrPaginator(response)
summarizer = SimpleSummarizer()
for page in paginator.page_range:
for doc in paginator.page(page).object_list:
text = doc.get("html_en", doc.get("nolang_txt", doc.get("html")))
title = doc.get("title_en", doc.get("title", doc["id"]))
summed = summarizer.get_summarized(text, options.num_sents)
print "\t\t=======", title, "======="
print "\t" + "\n\t".join([sent for sent in summed if not sent.startswith("Contents")])
print "\t\t====================================="