Python utils.download函数代码示例

本文整理汇总了Python中utils.utils.download函数的典型用法代码示例。如果您正苦于以下问题：Python download函数的具体用法？Python download怎么用？Python download使用的例子？那么, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了download函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: run

def run(options):
  year_range = inspector.year_range(options, archive)

  # # Pull the RSS feed
  doc = BeautifulSoup(utils.download(RSS_URL))
  results = doc.select("item")
  for result in results:
    report = rss_report_from(result, year_range)
    if report:
      inspector.save_report(report)

  # # Pull the recent audit reports.
  doc = BeautifulSoup(utils.download(RECENT_AUDITS_URL))
  results = doc.select("div.block > a")
  for result in results:
    report = report_from(result, year_range)
    if report:
      inspector.save_report(report)

  # Pull the archive audit reports
  doc = BeautifulSoup(utils.download(AUDIT_ARCHIVE_URL))
  results = doc.select("div.block a")
  for result in results:
    report = report_from(result, year_range)
    if report:
      inspector.save_report(report)

  # Pull the other reports
  doc = BeautifulSoup(utils.download(OTHER_REPORTS_URl))
  results = doc.select("div.block > a")
  for result in results:
    report = report_from(result, year_range)
    if report:
      inspector.save_report(report)

开发者ID:slobdell，项目名称:inspectors-general，代码行数:34，代码来源:smithsonian.py

示例2: run

def run(options):
    year_range = inspector.year_range(options, archive)

    # Pull the audit reports
    for year in year_range:
        if year < 2002:  # The oldest page for audit reports
            continue
        doc = BeautifulSoup(utils.download(AUDIT_REPORTS_URL.format(year=year)))
        results = doc.select("div.content table tr")
        for index, result in enumerate(results):
            if not index:
                # Skip the header row
                continue
            report = report_from(result, report_type="audit", year_range=year_range)
            if report:
                inspector.save_report(report)

    # Pull the FOIA reports
    doc = BeautifulSoup(utils.download(FOIA_REPORTS_URL))
    results = doc.select("div.content table tr")
    for index, result in enumerate(results):
        if not index:
            # Skip the header row
            continue
        report = report_from(result, report_type="other", year_range=year_range)
        if report:
            inspector.save_report(report)

    # Pull the semiannual reports
    doc = BeautifulSoup(utils.download(SEMIANNUAL_REPORTS_URL))
    results = doc.select("div.content a")
    for result in results:
        report = semiannual_report_from(result, year_range)
        if report:
            inspector.save_report(report)

开发者ID:slobdell，项目名称:inspectors-general，代码行数:35，代码来源:ncua.py

示例3: run

def run(options):
  year_range = inspector.year_range(options, archive)

  # Pull the audit reports
  for year in year_range:
    if year < 2005:  # This is the earliest audits go back
      continue
    url = AUDIT_REPORTS_URL.format(year=year)
    doc = BeautifulSoup(utils.download(url))
    results = doc.select("div.content")
    if not results:
      raise inspector.NoReportsFoundError("Tennessee Valley Authority (%d)" % year)
    for result in results:
      report = audit_report_from(result, url, year_range)
      if report:
        inspector.save_report(report)

  # Pull the semiannual reports
  doc = BeautifulSoup(utils.download(SEMIANNUAL_REPORTS_URL))
  results = doc.select("report")
  if not results:
    raise inspector.NoReportsFoundError("Tennessee Valley Authority (semiannual reports)")
  for result in results:
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)

开发者ID:Cloudxtreme，项目名称:inspectors-general，代码行数:26，代码来源:tva.py

示例4: run

def run(options):
  year_range = inspector.year_range(options)

  # Pull the audit reports
  for year in year_range:
    url = audit_report_url(year)
    if url:
      parse_result_from_js_url(url, "auditreports", year, year_range)
    url = inspection_report_url(year)
    if url:
      parse_result_from_js_url(url, "iereports", year, year_range)

  # Pull the congressional testimony
  doc = BeautifulSoup(utils.download(CONGRESSIONAL_TESTIMONY_REPORTS_URL))
  results = doc.findAll("ul", type='disc')[0].select("li")
  for result in results:
    report = congressional_testimony_report_from(result, year_range)
    if report:
      inspector.save_report(report)

  # Pull the semiannual reports
  doc = BeautifulSoup(utils.download(SEMIANNUAL_REPORTS_URL))
  results = doc.findAll("ul", type='disc')[0].select("li")
  for result in results:
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)

开发者ID:BunsenMcDubbs，项目名称:inspectors-general，代码行数:27，代码来源:tigta.py

示例5: run

def run(options):
  year_range = inspector.year_range(options, archive)

  # Pull the general reports
  doc = BeautifulSoup(utils.download(REPORTS_URL))
  results = doc.select("div#mainContent li.mainContenttext a")
  for result in results:
    report = report_from(result, REPORTS_URL, year_range)
    if report:
      inspector.save_report(report)

  # Pull the archive reports
  doc = BeautifulSoup(utils.download(REPORT_ARCHIVE_URL))
  results = doc.select("div#mainContent li.mainContenttext a") + doc.select("div#mainContent span.mainContenttext a")
  for result in results:
    if not result.text:
      continue
    report = report_from(result, REPORT_ARCHIVE_URL, year_range)
    if report:
      inspector.save_report(report)

  # Pull the semiannual reports
  doc = BeautifulSoup(utils.download(SEMIANNUAL_REPORTS_URL))
  results = doc.select("div#mainContent li.mainContenttext a")
  for result in results:
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)

开发者ID:slobdell，项目名称:inspectors-general，代码行数:28，代码来源:fca.py

示例6: urls_for_topics

  def urls_for_topics(self, topics):
    for topic in topics:
      # Topic might be a tuple for ADDITIONAL_TOPICS (not ones from command
      # line).
      self.report_type = None
      if isinstance(topic, tuple):
        topic, report_type = topic
        self.report_type = report_type

      last_page = False

      url = TOPIC_TO_URL[topic]
      page = BeautifulSoup(utils.download(url))
      page_started = self.is_first_page(page)
      if page_started:
        yield url

      for link in page.select('li.pager-item a'):
        next_url = urljoin(url, link['href'])
        next_page = BeautifulSoup(utils.download(next_url))
        if not page_started:
          page_started = self.is_first_page(next_page)
        if page_started:
          yield next_url
        last_page = self.is_last_page(next_page)
        if last_page:
          break
      if last_page:
        continue
    self.report_type = None  # Clear this out afterwards

开发者ID:slobdell，项目名称:inspectors-general，代码行数:30，代码来源:energy.py

示例7: urls_for

  def urls_for(self):
    only = self.options.get('topics')
    if only: # if only...
      only = set(only.split(','))
      only = [(o, TOPIC_TO_REPORT_TYPE[o]) if o in TOPIC_TO_REPORT_TYPE else o
              for o in only]
      yield from self.urls_for_topics(only)
      # If there are topics selected, ONLY yield URLs for those.
      return

    # First yield the URLs for the topics that are tangential to the main
    # Calendar Year reports.
    yield from self.urls_for_topics(ADDITIONAL_TOPICS)

    # Not getting reports from specific topics, iterate over all Calendar Year
    # reports.
    page = BeautifulSoup(utils.download(BASE_URL))

    # Iterate over each "Calendar Year XXXX" link
    for li in page.select('.field-items li'):
      md = RE_CALENDAR_YEAR.search(li.text)
      if md:
        cur_year = int(md.group(1))
        if cur_year >= self.year_range[0] and cur_year <= self.year_range[-1]:
          href = li.select('a')[0]['href']
          next_url = urljoin(BASE_URL, href)
          # The first page of reports is yielded.
          yield next_url

          # Next, read all the pagination links for the page and yield those. So
          # far, I haven't seen a page that doesn't have all of the following
          # pages enumerated.
          next_page = BeautifulSoup(utils.download(next_url))
          for link in next_page.select('li.pager-item a'):
            yield urljoin(BASE_URL, link['href'])

开发者ID:slobdell，项目名称:inspectors-general，代码行数:35，代码来源:energy.py

示例8: run

def run(options):
  year_range = inspector.year_range(options, archive)

  # Pull the audit reports
  for year in year_range:
    url = AUDITS_REPORTS_URL.format(str(year)[2:4])
    doc = BeautifulSoup(utils.download(url))
    results = doc.select("tr")
    if not results:
      raise inspector.NoReportsFoundError("NASA (%d)" % year)
    for index, result in enumerate(results):
      if not index or not result.text.strip():
        # Skip the header row and any empty rows
        continue
      report = audit_report_from(result, url, year_range)
      if report:
        inspector.save_report(report)

  # Pull the other reports
  doc = BeautifulSoup(utils.download(OTHER_REPORT_URL))
  results = doc.select("#subContainer ul li")
  if not results:
    raise inspector.NoReportsFoundError("NASA (other)")
  for result in results:
    report = other_report_from(result, year_range)
    if report:
      inspector.save_report(report)

开发者ID:Cloudxtreme，项目名称:inspectors-general，代码行数:27，代码来源:nasa.py

示例9: run

def run(options):
  year_range = inspector.year_range(options, archive)

  # Pull the reports with pagination
  for report_type, report_url_format in PAGINATED_REPORT_FORMATS.items():
    for page in range(0, 999):
      url = report_url_format.format(page=page)
      doc = BeautifulSoup(utils.download(url))
      results = doc.select("li.views-row")
      if not results:
        if page == 0:
          raise inspector.NoReportsFoundError("USAID (%s)" % report_type)
        else:
          break

      for result in results:
        report = report_from(result, url, report_type, year_range)
        if report:
          inspector.save_report(report)

  # Pull the semiannual reports (no pagination)
  doc = BeautifulSoup(utils.download(SEMIANNUAL_REPORTS_URL))
  results = doc.select("li.views-row")
  if not results:
    raise inspector.NoReportsFoundError("USAID (semiannual reports)")
  for result in results:
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)

开发者ID:Cloudxtreme，项目名称:inspectors-general，代码行数:29，代码来源:usaid.py

示例10: run

def run(options):
  year_range = inspector.year_range(options, archive)

  doc = BeautifulSoup(utils.download(REPORTS_URL))

  # Pull the semiannual reports
  semiannul_results = doc.select("#AnnualManagementReports select")[0]
  for result in semiannul_results.select("option"):
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)

  # Pull the special reports
  special_report_table = doc.find("table", attrs={"bordercolor": "#808080"})
  for index, result in enumerate(special_report_table.select("tr")):
    if not index:
      # Skip the header row
      continue
    report = report_from(result, REPORTS_URL, report_type='other', year_range=year_range)
    if report:
      inspector.save_report(report)

  # Pull the audit reports
  for year in year_range:
    if year < 2001:  # The oldest fiscal year page available
      continue
    year_url = AUDIT_REPORTS_URL.format(year=year)
    doc = BeautifulSoup(utils.download(year_url))
    for index, result in enumerate(doc.select("#main table tr")):
      if not index:
        # Skip the header row
        continue
      report = report_from(result, year_url, report_type='audit', year_range=year_range)
      if report:
        inspector.save_report(report)

开发者ID:slobdell，项目名称:inspectors-general，代码行数:35，代码来源:rrb.py

示例11: run

def run(options):
  year_range = inspector.year_range(options, archive)
  pages = options.get('pages', ALL_PAGES)

  # Pull the audit reports. Pages are 0-indexed.
  for page in range(0, int(pages) - 1):
    doc = BeautifulSoup(utils.download(AUDIT_REPORTS_URL.format(page=page)))
    results = doc.select("span.field-content")
    if not results:
      # No more results, we must have hit the last page
      break

    for result in results:
      report = report_from(result, year_range, report_type='audit')
      if report:
        inspector.save_report(report)

  # Grab the other reports
  for report_type, url in OTHER_REPORT_URLS.items():
    doc = BeautifulSoup(utils.download(url))
    results = doc.select(".views-field")
    if not results:
      results = doc.select(".views-row")
    for result in results:
      report = report_from(result, year_range, report_type)
      if report:
        inspector.save_report(report)

开发者ID:slobdell，项目名称:inspectors-general，代码行数:27，代码来源:fhfa.py

示例12: run

def run(options):
  year_range = inspector.year_range(options, archive)

  # Pull the audit reports
  for year in year_range:
    if year < 2006:  # The oldest year for audit reports
      continue
    url = AUDIT_REPORTS_URL.format(year=year)
    doc = BeautifulSoup(utils.download(url))
    results = doc.select("div#content li")
    for result in results:
      report = audit_report_from(result, url, year, year_range)
      if report:
        inspector.save_report(report)

  # Pull the semiannual reports
  doc = BeautifulSoup(utils.download(SEMIANNUAL_REPORTS_URL))
  results = doc.select("div#content li")
  for result in results:
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)

  # Pull the Peer Review
  doc = BeautifulSoup(utils.download(PEER_REVIEWS_URL))
  result = doc.find("div", id='content').find("a", text=True)
  report = peer_review_from(result, year_range)
  inspector.save_report(report)

开发者ID:slobdell，项目名称:inspectors-general，代码行数:28，代码来源:archives.py

示例13: extract_reports_for_oei

def extract_reports_for_oei(year_range):
  topic_name = TOPIC_NAMES["OE"]
  topic_url = TOPIC_TO_URL["OE"]
  root_body = utils.download(topic_url)
  root_doc = BeautifulSoup(root_body)

  letter_urls = set()
  for link in root_doc.select("#leftContentInterior li a"):
    absolute_url = urljoin(topic_url, link['href'])
    absolute_url = strip_url_fragment(absolute_url)
    letter_urls.add(absolute_url)

  if not letter_urls:
    raise inspector.NoReportsFoundError("HHS (OEI first pass)")

  all_results_links = {}
  all_results_unreleased = []
  for letter_url in letter_urls:
    letter_body = utils.download(letter_url)
    letter_doc = BeautifulSoup(letter_body)

    results = letter_doc.select("#leftContentInterior ul li")
    if not results:
      raise inspector.NoReportsFoundError("HHS (OEI %s)" % letter_url)
    for result in results:
      if 'crossref' in result.parent.parent.attrs.get('class', []):
        continue
      if result.parent.parent.attrs.get('id') == 'related':
        continue

      node = result
      while node and node.name != "h2":
        node = node.previous
      if node and node.name == "h2":
        subtopic_name = str(node.text)
      else:
        subtopic_name = "(unknown)"

      links = result.findAll("a")
      if len(links) == 0:
        result.extract()
        all_results_unreleased.append([result, subtopic_name])
      else:
        url = links[0].get("href")
        if url not in all_results_links:
          result.extract()
          all_results_links[url] = [result, subtopic_name]
        else:
          existing_result = all_results_links[url][0]
          for temp in result.contents:
            temp.extract()
            existing_result.append(temp)
          all_results_links[url][1] = "%s, %s" % (all_results_links[url][1], subtopic_name)

  subtopic_url = TOPIC_TO_URL["OE"]
  for result, subtopic_name in itertools.chain(all_results_links.values(), all_results_unreleased):
    report = report_from(result, year_range, topic_name, subtopic_url, subtopic_name)
    if report:
      inspector.save_report(report)

开发者ID:Cloudxtreme，项目名称:inspectors-general，代码行数:59，代码来源:hhs.py

示例14: run

def run(options):
  year_range = inspector.year_range(options, archive)

  # Pull the audit reports
  doc = BeautifulSoup(utils.download(AUDIT_REPORTS_URL))
  results = doc.select("td.text table tr")
  if not results:
    raise inspector.NoReportsFoundError("National Science Foundation (audit reports")
  for result in results:
    # ignore divider lines
    if result.select("img"): continue

    report = report_from(result, report_type='audit', year_range=year_range)
    if report:
      inspector.save_report(report)

  # Pull the semiannual reports
  doc = BeautifulSoup(utils.download(SEMIANNUAL_REPORTS_URL))
  results = doc.select("td.text table tr")
  if not results:
    raise inspector.NoReportsFoundError("National Science Foundation (semiannual reports)")
  for result in results:
    if not result.text.strip():
      continue
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)

  # Pull the case reports
  response = utils.scraper.post(
    url=CASE_REPORTS_URL,
    data=CASE_REPORTS_DATA,
  )
  doc = BeautifulSoup(response.content)
  results = doc.select("td.text table tr")
  if not results:
    raise inspector.NoReportsFoundError("National Science Foundation (case reports)")
  for index, result in enumerate(results):
    if not index or not result.text.strip():  # Skip the header row and empty rows
      continue
    report = case_report_from(result, CASE_REPORTS_URL, year_range)
    if report:
      inspector.save_report(report)

  # Pull the testimony
  doc = BeautifulSoup(utils.download(TESTIMONY_REPORTS_URL))
  results = doc.select("td.text table tr")
  if not results:
    raise inspector.NoReportsFoundError("National Science Foundation (testimony)")
  for result in results:
    if not result.text.strip():
      continue
    report = report_from(result, report_type='testimony', year_range=year_range)
    if report:
      inspector.save_report(report)

开发者ID:harrisj，项目名称:inspectors-general，代码行数:55，代码来源:nsf.py

示例15: handle_scanner_args

def handle_scanner_args(args, opts) -> Tuple[dict, list]:
    """
    --analytics: file path or URL to a CSV of participating domains.

    This function also handles checking for the existence of the file,
    downloading it succesfully, and reading the file in order to populate the
    list of analytics domains.
    """
    parser = scan_utils.ArgumentParser(prefix_chars="--")
    parser.add_argument("--analytics", nargs=1, required=True)
    parsed, unknown = parser.parse_known_args(args)
    dicted = vars(parsed)
    should_be_single = ["analytics"]
    dicted = scan_utils.make_values_single(dicted, should_be_single)
    resource = dicted.get("analytics")
    if not resource.endswith(".csv"):
        no_csv = "".join([
            "--analytics should be the file path or URL to a CSV of participating",
            " domains and end with .csv, which '%s' does not" % resource
        ])
        logging.error(no_csv)
        raise argparse.ArgumentTypeError(no_csv)
    try:
        parsed_url = urlparse(resource)
    except:
        raise
    if parsed_url.scheme and parsed_url.scheme in ("http", "https"):
        analytics_path = Path(opts["_"]["cache_dir"], "analytics.csv").resolve()
        try:
            utils.download(resource, str(analytics_path))
        except:
            logging.error(utils.format_last_exception())
            no_csv = "--analytics URL %s not downloaded successfully." % resource
            logging.error(no_csv)
            raise argparse.ArgumentTypeError(no_csv)
    else:
        if not os.path.exists(resource):
            no_csv = "--analytics file %s not found." % resource
            logging.error(no_csv)
            raise FileNotFoundError(no_csv)
        else:
            analytics_path = resource

    analytics_domains = utils.load_domains(analytics_path)
    dicted["analytics_domains"] = analytics_domains
    del dicted["analytics"]

    return (dicted, unknown)

开发者ID:18F，项目名称:domain-scan，代码行数:48，代码来源:analytics.py

注：本文中的utils.utils.download函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。