本文整理匯總了Python中Crawler.Crawler.crawl_multithread方法的典型用法代碼示例。如果您正苦於以下問題:Python Crawler.crawl_multithread方法的具體用法?Python Crawler.crawl_multithread怎麽用?Python Crawler.crawl_multithread使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類Crawler.Crawler
的用法示例。
在下文中一共展示了Crawler.crawl_multithread方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: cralwer
# 需要導入模塊: from Crawler import Crawler [as 別名]
# 或者: from Crawler.Crawler import crawl_multithread [as 別名]
from Crawler import Crawler
import os.path
parser = argparse.ArgumentParser(description='Crawl file and execute regex rules on them')
parser.add_argument('-p', metavar='ParameterFilePath', type=argparse.FileType('r'), required=True,
help="path to a parameter json file. Parameter file should contain a 'crawling', 'rules' and 'result' key")
parser.add_argument('-o', metavar='OutputFilePath', type=argparse.FileType('w+'), help='output file. This argument is required if no output is specified in parameter file.\n The file must be either a .csv or .json')
parser.add_argument('-mt', metavar='Thread Numbers', type=int, help='have a multi-threaded cralwer (1 thread per file) and precise the number of concurrent thread')
parser.add_argument('-s', metavar='StartDirectory', type=str, help='directory in which the crawling will start. This parameter is necessary if there is no "crawling" dictionary in the parameter file')
args = parser.parse_args()
if "p" not in args or args.p is None:
parser.error(parser.format_usage())
param = FO.get_from_JSON_file(args.p.name)
if "rules" not in param or ("o" not in args and "output" not in param):
print("rules error")
parser.error(parser.format_usage())
if "crawling" not in param and ("s" not in args or args.s is None):
parser.error(parser.format_usage())
elif "s" in args and args.s is not None:
param["crawling"] = { "start": args.s}
if "o" in args and args.o is not None:
output_name, output_extension = os.path.splitext(args.o.name)
param["output"] = {
"path": args.o.name,
"type": "csv" if ".csv" in output_extension else "json"
}
if "mt" in args and args.mt is not None:
Crawler.crawl_multithread(param.get("crawling"), param.get("rules"), param.get("result"), param["output"], args.mt)
else:
Crawler.crawl(param.get("crawling"), param.get("rules"), param.get("result"), param["output"])
示例2: test_crawl_native_minimalParameterFile_multithreaded_native
# 需要導入模塊: from Crawler import Crawler [as 別名]
# 或者: from Crawler.Crawler import crawl_multithread [as 別名]
def test_crawl_native_minimalParameterFile_multithreaded_native(self):
parameters = FileOperations.get_from_JSON_file("./test/minimal_parameters.json")
data = Crawler.crawl_multithread(parameters["crawling"], parameters["rules"], parameters.get("result"))
self.assertEqual(data['./test/test_inputs/minimalist_data.txt']['matches']['HasName']['city'][0], 'London')