本文整理汇总了Python中purl.URL.domain方法的典型用法代码示例。如果您正苦于以下问题:Python URL.domain方法的具体用法?Python URL.domain怎么用?Python URL.domain使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类purl.URL
的用法示例。
在下文中一共展示了URL.domain方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_data
# 需要导入模块: from purl import URL [as 别名]
# 或者: from purl.URL import domain [as 别名]
def get_data(q_link):
url = URL(q_link)
if url.domain() not in ['quora.com', 'www.quora.com']:
return 'error, not quora'
url = URL(
scheme='https',
host='www.quora.com',
path=url.path(),
query='share=1').as_string()
soup = BeautifulSoup(requests.get(url).text)
question = {}
question['url'] = url
question['title'] = soup.find("div", {"class": "question_text_edit"}).text
question['topics'] = [topic.text for topic in soup.find_all("div", {"class": "topic_list_item"})]
question['details'] = soup.find("div", {"class": "question_details_text"}).text
answers = []
divs = soup.find_all("div", {"class": "pagedlist_item"})
try:
ans_count = soup.find("div", {"class": "answer_header_text"}).text.strip()
count = int(re.match(r'(\d+) Answers', ans_count).groups()[0])
except:
return jsonify(question=question, answers=answers)
question['answer_count'] = count
count = len(divs) - 1 if count < 6 else 6
for i in range(count):
one_answer = {
'votes': '-1',
'rank': 0,
'answer': ''
}
try:
author = {}
author['name'] = divs[i].find("div", {"class": "answer_user"}).find("span", {"class": "answer_user_wrapper"}).find("a", {"class": "user"}).string
author['bio'] = divs[i].find("div", {"class": "answer_user"}).find("span", {"class": "answer_user_wrapper"}).find_all("span", {"class": "rep"})[1].find("span", {"class": "hidden"}).text
except:
author['name'] = 'Anonymous'
author['bio'] = ''
one_answer['author'] = author
one_answer['votes'] = divs[i].find("span", {"class":"numbers"}).text
html_block = divs[i].find("div", {"id": re.compile("(.*)_container")}).contents
answer_html = ''
for p in range(len(html_block) - 1):
answer_html += str(html_block[p])
one_answer['answer_html'] = answer_html
one_answer['answer'] = divs[i].find("div", {"class": "answer_content"}).text
one_answer['rank'] = i + 1
answers.append(one_answer)
return jsonify(question=question, answers=answers)
示例2: canonical_url
# 需要导入模块: from purl import URL [as 别名]
# 或者: from purl.URL import domain [as 别名]
def canonical_url(url, domain_check=True):
"""
Ensure that the url contains the `http://mysite.com` part,
particularly for requests made on the local dev server
"""
current_site = Site.objects.get(id=settings.SITE_ID)
if not url.startswith('http'):
url = "http://%s" % os.path.join(current_site.domain, url.lstrip('/'))
if domain_check:
url_parts = URL(url)
current_site_parts = URL(URL().domain(current_site.domain).as_string())
if url_parts.subdomains()[-2:] != current_site_parts.subdomains()[-2:]:
raise ValueError("Suspicious domain '%s' that differs from the "
"current Site one '%s'" % (url_parts.domain(), current_site_parts.domain()))
return url
示例3: get_questions
# 需要导入模块: from purl import URL [as 别名]
# 或者: from purl.URL import domain [as 别名]
def get_questions(s_link):
url = URL(s_link)
if url.domain() not in ['quora.com', 'www.quora.com']:
return 'error, not quora'
url = URL(
scheme='https',
host='www.quora.com',
path=url.path(),
query='share=1').as_string()
soup = BeautifulSoup(requests.get(url).text)
topic = {}
topic['url'] = url
topic['title'] = soup.find("span", {"class": "TopicName"}).text
questions = []
divs = soup.find_all("div", {"class": "pagedlist_item"})
count = len(divs) - 1
for i in range(count):
one_question = {
'url': '',
'title': ''
}
try:
one_question['url'] = divs[i].find("a", {"class": "question_link"})['href']
one_question['title'] = divs[i].find("a", {"class": "question_link"}).find("span", {"class": "link_text"}).text
except:
jsonify(topic=topic, questions=questions, parse_failure=one_question)
one_question['url'] = URL(
scheme='https',
host='www.quora.com',
path=one_question['url']).as_string()
if one_question['title'] != "":
questions.append(one_question)
return jsonify(topic=topic, questions=questions)