當前位置: 首頁>>代碼示例>>Python>>正文


Python tldextract.extract方法代碼示例

本文整理匯總了Python中tldextract.extract方法的典型用法代碼示例。如果您正苦於以下問題:Python tldextract.extract方法的具體用法?Python tldextract.extract怎麽用?Python tldextract.extract使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在tldextract的用法示例。


在下文中一共展示了tldextract.extract方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: get_subdomains

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def get_subdomains(domain):
    #validate domain
    if domain:
        p = domain.strip().lower()
        re_domain = re.findall(r'^(([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,})$', p)
        if len(re_domain) > 0 and re_domain[0][0] == p and tldextract.extract(p).suffix != '':
            pass
        else:
            logger.error('Domain validation failed: {d}'.format(d=p))
    else:
        logger.warning('domain is empty')
        return

    try:
        esd = EnumSubDomain(domain)
        return esd.run()
    except Exception:
        logger.error('Unexpected error occured when brute subdomain for {}'.format(domain),exc_info=True) 
開發者ID:m4yfly,項目名稱:butian-src-domains,代碼行數:20,代碼來源:subdomain_brute.py

示例2: internal_links

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def internal_links(target):
	global total, int_total
	print(G + '[+]' + C + ' Extracting Internal Links' + W, end = '')

	ext = tldextract.extract(target)
	domain = ext.registered_domain

	links = soup.find_all('a')
	for link in links:
		url = link.get('href')
		if url != None:
			if domain in url:
				int_total.append(url)

	int_total = set(int_total)
	print(G + '['.rjust(6, '.') + ' {} ]'.format(str(len(int_total)))) 
開發者ID:thewhiteh4t,項目名稱:FinalRecon,代碼行數:18,代碼來源:crawler.py

示例3: external_links

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def external_links(target):
	global total, ext_total
	print(G + '[+]' + C + ' Extracting External Links' + W, end = '')

	ext = tldextract.extract(target)
	domain = ext.registered_domain

	links = soup.find_all('a')
	for link in links:
		url = link.get('href')
		if url != None:
			if domain not in url and 'http' in url:
				ext_total.append(url)

	ext_total = set(ext_total)
	print(G + '['.rjust(6, '.') + ' {} ]'.format(str(len(ext_total)))) 
開發者ID:thewhiteh4t,項目名稱:FinalRecon,代碼行數:18,代碼來源:crawler.py

示例4: remove_tld

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def remove_tld(domain):
    """Remove the TLD from a domain name.

    Params:
    - domain: (type: string) FQDN.

    Returns:
    - domain: (type: string) FQDN without TLD.
    """
    try:
        tld = extract(domain).suffix
        domain = ''.join(domain.rsplit(tld, 1)).strip('.')

    except Exception as e:
        LOGGING.warning(
            'Error stripping TLD ({0}): {1}'.format(
                domain, str(e)))

    return domain 
開發者ID:phage-nz,項目名稱:ph0neutria,代碼行數:21,代碼來源:string_utils.py

示例5: transfer_session_cookies_to_driver

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def transfer_session_cookies_to_driver(self, domain=None):
        """Copies the Session's cookies into the webdriver

        Using the 'domain' parameter we choose the cookies we wish to transfer, we only
        transfer the cookies which belong to that domain. The domain defaults to our last visited
        site if not provided.
        """
        if not domain and self._last_requests_url:
            domain = tldextract.extract(self._last_requests_url).registered_domain
        elif not domain and not self._last_requests_url:
            raise Exception('Trying to transfer cookies to selenium without specifying a domain '
                            'and without having visited any page in the current session')

        # Transfer cookies
        for c in [c for c in self.cookies if domain in c.domain]:
            self.driver.ensure_add_cookie({'name': c.name, 'value': c.value, 'path': c.path,
                                           'expiry': c.expires, 'domain': c.domain}) 
開發者ID:tryolabs,項目名稱:requestium,代碼行數:19,代碼來源:requestium.py

示例6: getDomain

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def getDomain(url):
    """

    This function will get top level domain from given URL.

    Parameters
    -------
    url: str
        Original URL provided in the argument.

    Returns
    --------
    str
        top level domain will be returned.
    """
    if urlparse(url).netloc != '':
        finalset.add(urlparse(url).netloc)
    ext = tldextract.extract(str(url))
    return ext.registered_domain 
開發者ID:nsonaniya2010,項目名稱:SubDomainizer,代碼行數:21,代碼來源:SubDomainizer.py

示例7: parse_domain

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def parse_domain(url):

    """
    Extract a domain from a URL.

    Args:
        url (str)

    Returns: str
    """

    url = url.lower().strip()

    # Get the last `http://...` sequence.
    url = re.compile('http[s]?:/{1,2}').split(url)[-1]

    tld = tldextract.extract(url)

    return tld.registered_domain 
開發者ID:davidmcclure,項目名稱:open-syllabus-project,代碼行數:21,代碼來源:utils.py

示例8: can_create_user

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def can_create_user(email_address, blacklisted_domains=None):
    """
    Returns true if a user with the specified e-mail address can be created.
    """

    if features.BLACKLISTED_EMAILS and email_address and "@" in email_address:
        blacklisted_domains = blacklisted_domains or []
        _, email_domain = email_address.split("@", 1)
        extracted = tldextract.extract(email_domain)
        if extracted.registered_domain.lower() in blacklisted_domains:
            return False

    if not features.USER_CREATION:
        return False

    if features.INVITE_ONLY_USER_CREATION:
        if not email_address:
            return False

        # Check to see that there is an invite for the e-mail address.
        return bool(model.team.lookup_team_invites_by_email(email_address))

    # Otherwise the user can be created (assuming it doesn't already exist, of course)
    return True 
開發者ID:quay,項目名稱:quay,代碼行數:26,代碼來源:shared.py

示例9: collect_url

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def collect_url(web_url):
        html_data_string = ''
        try:
            received_response = urlopen(web_url)
            if 'text/html' in received_response.getheader('Content-Type'):
                data_bytes = received_response.read()
                html_data_string = data_bytes.decode("latin-1")
            link_finder = link_crawler(Crawl_bot.start_link, web_url)
            link_finder.feed(html_data_string)

##############################################################################################################################################################################################
#######################################FOR SCRAPPING PURPOSES#################################################################################################################################
            f = open(Crawl_bot.folder_name + '/' + ((tldextract.extract(web_url)).domain), 'w')
            f.write(html_data_string)
            f.close()
###############################################################################################################################################################################################
###############################################################################################################################################################################################

        except Exception as e:
            print(str(e))
            return set()
        return link_finder.page_urls() 
開發者ID:Quantika14,項目名稱:osint-suite-tools,代碼行數:24,代碼來源:crawl_bot.py

示例10: search_google_

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def search_google_(target):
    engine = Google()
    results = engine.search("'" + target + "'")
    for r in results:
        print ("|--[INFO][GOOGLE][RESULTS][>] " + r["title"] + " | " + r["text"] + " | " + r["link"])
        
        try:
            tsd, td, tsu = extract(r["link"])
            domain = td + '.' + tsu

            web = requests.get(r["link"], timeout=3)
            print ("|----[INFO][WEB][HTTP CODE][>] " + str(web.status_code) + "\n")

            if web.status_code >= 200 or web.status_code < 300:

                if not domain in config.BL_parserPhone:
                    TEXT = er.remove_tags(str(web.text))
                    parser.parserMAIN(TEXT)

        except Exception as e:
            print ("|----[ERROR][HTTP CONNECTION][>] " + str(e)) 
開發者ID:Quantika14,項目名稱:osint-suite-tools,代碼行數:23,代碼來源:BuscadorPersonas.py

示例11: join_words_subdomains

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def join_words_subdomains(args, alteration_words):
    with open(args.input, "r") as fp:
        with open(args.output_tmp, "a") as wp:
            for line in fp:
                ext = tldextract.extract(line.strip())
                current_sub = ext.subdomain.split(".")
                for word in alteration_words:
                    for index, value in enumerate(current_sub):
                        original_sub = current_sub[index]
                        current_sub[index] = current_sub[index] + word.strip()
                        # join the list to make into actual subdomain (aa.bb.cc)
                        actual_sub = ".".join(current_sub)
                        # save full URL as line in file
                        full_url = "{0}.{1}.{2}\n".format(
                            actual_sub, ext.domain, ext.suffix)
                        write_domain(args, wp, full_url)
                        current_sub[index] = original_sub
                        # second dash alteration
                        current_sub[index] = word.strip() + current_sub[index]
                        actual_sub = ".".join(current_sub)
                        # save second full URL as line in file
                        full_url = "{0}.{1}.{2}\n".format(
                            actual_sub, ext.domain, ext.suffix)
                        write_domain(args, wp, full_url)
                        current_sub[index] = original_sub 
開發者ID:infosec-au,項目名稱:altdns,代碼行數:27,代碼來源:__main__.py

示例12: cleanup_url

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def cleanup_url(url, tls_support):
        """
        Add protocols to the URI if they are missing, else return None.
        """
        parsed = urlparse(url)
        if parsed.scheme == '':
            _tld = tldextract.extract(url)
            _tld = f'{_tld.subdomain}.{_tld.domain}.{_tld.suffix}'
            try:
                tls_supported = tls_support[_tld]
            except KeyError:
                tls_supported = TlsTest.test_tls_supported(url)
                tls_support[_tld] = tls_supported
                log.info('Tested domain {}'.format(_tld))

            if tls_supported:
                return "'https://{}'".format(url)
            else:
                return "'http://{}'".format(url)
        else:
            return None 
開發者ID:creativecommons,項目名稱:cccatalog-api,代碼行數:23,代碼來源:cleanup.py

示例13: getGoogleDomains

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def getGoogleDomains(self):

		googleList = []
		file = "results/"+self.org+"/google.txt"
		if os.path.exists(file):
			fh = open(file)
			for line in fh:	
				extracted = tldextract.extract(line)
				tld = extracted.domain+"."+extracted.suffix
				if tld not in googleList:
					googleList.append(tld)
			for domain in googleList:
				print(domain)
			fh.close() 
		else:
			print("Not found")
			exit() 
開發者ID:esecuritylab,項目名稱:kostebek,代碼行數:19,代碼來源:kostebek.py

示例14: getBingDomains

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def getBingDomains(self):

		googleList = []
		file = "results/"+self.org+"/bing.txt"
		if os.path.exists(file):
			fh = open(file)
			for line in fh:	
				extracted = tldextract.extract(line)
				tld = extracted.domain+"."+extracted.suffix
				if tld not in googleList:
					googleList.append(tld)
			for domain in googleList:
				print(domain)
			fh.close()
		else:
			print("Not found")
			exit() 
開發者ID:esecuritylab,項目名稱:kostebek,代碼行數:19,代碼來源:kostebek.py

示例15: getYahooDomains

# 需要導入模塊: import tldextract [as 別名]
# 或者: from tldextract import extract [as 別名]
def getYahooDomains(self):

		googleList = []
		file = "results/"+self.org+"/yahoo.txt"
		if os.path.exists(file):
			fh = open(file)
			for line in fh:	
				extracted = tldextract.extract(line)
				tld = extracted.domain+"."+extracted.suffix
				if tld not in googleList:
					googleList.append(tld)
			for domain in googleList:
				print(domain)
			fh.close()
		else:
			print("Not found")
			exit() 
開發者ID:esecuritylab,項目名稱:kostebek,代碼行數:19,代碼來源:kostebek.py


注:本文中的tldextract.extract方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。