本文整理汇总了Python中utils.merge函数的典型用法代码示例。如果您正苦于以下问题:Python merge函数的具体用法?Python merge怎么用?Python merge使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了merge函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: getlocals
def getlocals(steamhome, logindata, gamedata, localdata):
logindata["configdata"] = {}
for root, subFolders, files in os.walk(os.path.join(steamhome["path"], "userdata", logindata["dir"])):
for file in files:
if file.lower().endswith("config.vdf"):
vdfdata = utils.vdf.load(open(os.path.join(steamhome["path"], "userdata", root, file)))
logindata["configdata"] = utils.merge(logindata["configdata"], vdfdata)
def getnewgamedata(appid, name):
ret = {"appid": appid, "name": name}
if int(appid) <= 999999: # not a shortcut
ret["firstseen"] = int(time.time())
return ret
purchaseddata = purchased.getpurchased(logindata)
localdb = utils.merge(shortcuts.getshortcuts(steamhome, logindata), apppackages.getappinfo(steamhome, logindata))
localdata.clear()
for g in localdb:
if "data-isdlc" not in localdb[g]:
localdata[g] = localdb[g]
if not g in gamedata:
gamedata[g] = getnewgamedata(g, localdb[g]["name"])
if "data-packageid" in localdb[g] and localdb[g]["data-packageid"] in purchaseddata:
gamedata[g]["firstseen"] = purchaseddata[localdb[g]["data-packageid"]]
示例2: select_group
def select_group (self, group):
if self.__group == group:
return
if group:
groups = [ group ] + [ g for g in self.groups if g != group ]
else:
groups = self.groups
# clear dict and only keep some values we want unchanged
if not self.__base_dict:
self.__base_dict = self.__dict__.copy()
else:
self.__dict__ = self.__base_dict.copy()
# updating
for group_ in groups:
group_.select_group(None)
if group_.handlers:
merge(self.handlers, group_.handlers)
self.__inherits(self.__dict__, group_.__dict__)
# some value that we must reset to their original state
for key in ('synctrex', 'group', 'groups', 'children'):
if key in self.__base_dict:
setattr(self, key, self.__base_dict[key])
elif hasattr(self, key):
delattr(self, key)
self.__group = group
示例3: _merge_sort
def _merge_sort(a, left, right):
if right - left < 2:
return
pivot = (right + left) // 2
_merge_sort(a, left, pivot)
_merge_sort(a, pivot, right)
merge(a, left, pivot, right)
示例4: mkdumpdb
def mkdumpdb():
dumpdb = {
"name64": logindata["id64"],
"name": logindata["dir"],
"user": logindata["name"]
}
if "avatar" in logindata["configdata"]["UserLocalConfigStore"]["friends"][logindata["dir"]]:
dumpdb["avatar"] = logindata["configdata"]["UserLocalConfigStore"]["friends"][logindata["dir"]]["avatar"]
dumpdb["gamelist"] = {}
for db in dbs:
dbo = copy.deepcopy(db)
utils.merge(dumpdb["gamelist"],dbo)
return dumpdb
示例5: run
def run(options):
amdt_id = options.get('amendment_id', None)
search_state = { }
if amdt_id:
amdt_type, number, congress = utils.split_bill_id(amdt_id)
to_fetch = [amdt_id]
else:
congress = options.get('congress', utils.current_congress())
to_fetch = bill_ids_for(congress, utils.merge(options, {'amendments': True}), bill_states=search_state)
if not to_fetch:
if options.get("fast", False):
logging.warn("No amendments changed.")
else:
logging.error("Error figuring out which amendments to download, aborting.")
return None
limit = options.get('limit', None)
if limit:
to_fetch = to_fetch[:int(limit)]
if options.get('pages_only', False):
return None
logging.warn("Going to fetch %i amendments from congress #%s" % (len(to_fetch), congress))
saved_amendments = utils.process_set(to_fetch, fetch_amendment, options)
save_bill_search_state(saved_amendments, search_state)
示例6: mirror_package
def mirror_package(sitemap, package_name, lastmod, content_detail_url, options):
"""Create a local mirror of a FDSys package."""
# Return a list of files we downloaded.
results = []
if not options.get("granules", False):
# Most packages are just a package. This is the usual case.
results = mirror_package_or_granule(sitemap, package_name, None, lastmod, options)
else:
# In some collections, like STATUTE, each document has subparts which are not
# described in the sitemap. Load the main HTML page and scrape for the sub-files.
# In the STATUTE collection, the MODS information in granules is redundant with
# information in the top-level package MODS file. But the only way to get granule-
# level PDFs is to go through the granules.
content_index = utils.download(content_detail_url,
"fdsys/package/%s/%s/%s.html" % (sitemap["year"], sitemap["collection"], package_name),
utils.merge(options, {
'binary': True,
}))
if not content_index:
raise Exception("Failed to download %s" % content_detail_url)
for link in html.fromstring(content_index).cssselect("table.page-details-data-table td.rightLinkCell a"):
if link.text == "More":
m = re.match("granule/(.*)/(.*)/content-detail.html", link.get("href"))
if not m or m.group(1) != package_name:
raise Exception("Unmatched granule URL %s" % link.get("href"))
granule_name = m.group(2)
results = mirror_package_or_granule(sitemap, package_name, granule_name, lastmod, options)
return results
示例7: vote_ids_for_senate
def vote_ids_for_senate(congress, session_year, options):
session_num = int(session_year) - utils.get_congress_first_year(int(congress)) + 1
vote_ids = []
page = utils.download(
"http://www.senate.gov/legislative/LIS/roll_call_lists/vote_menu_%s_%d.xml" % (congress, session_num),
"%s/votes/%s/pages/senate.xml" % (congress, session_year),
utils.merge(options, {'binary': True})
)
if not page:
logging.error("Couldn't download Senate vote XML index, aborting")
return None
dom = etree.fromstring(page)
# Sanity checks.
if int(congress) != int(dom.xpath("congress")[0].text):
logging.error("Senate vote XML returns the wrong Congress: %s" % dom.xpath("congress")[0].text)
return None
if int(session_year) != int(dom.xpath("congress_year")[0].text):
logging.error("Senate vote XML returns the wrong session: %s" % dom.xpath("congress_year")[0].text)
return None
# Get vote list.
for vote in dom.xpath("//vote"):
num = int(vote.xpath("vote_number")[0].text)
vote_id = "s" + str(num) + "-" + str(congress) + "." + session_year
if not should_process(vote_id, options):
continue
vote_ids.append(vote_id)
return vote_ids
示例8: narrow_docids
def narrow_docids(self, idx):
m0 = [ decode_array(idx[feat]) for feat in self.feats if idx.has_key(feat) ]
if not m0:
return []
refs = merge(m0)
locs = [ (refs[i], refs[i+1]) for i in xrange(0, len(refs), 2) ]
return locs
示例9: document_info_for
def document_info_for(filename, cache, options):
mods_url = mods_for(filename)
mods_cache = ""
body = utils.download(mods_url,
cache,
utils.merge(options, {'xml': True})
)
doc = etree.fromstring(body)
mods_ns = {"mods": "http://www.loc.gov/mods/v3"}
locations = doc.xpath("//mods:location/mods:url", namespaces=mods_ns)
urls = {}
for location in locations:
label = location.attrib['displayLabel']
if "HTML" in label:
format = "html"
elif "PDF" in label:
format = "pdf"
elif "XML" in label:
format = "xml"
else:
format = "unknown"
urls[format] = location.text
issued_on = doc.xpath("string(//mods:dateIssued)", namespaces=mods_ns)
return issued_on, urls
示例10: get_sitemap
def get_sitemap(year, collection, lastmod, options):
"""Gets a single sitemap, downloading it if the sitemap has changed.
Downloads the root sitemap (year==None, collection==None), or
the sitemap for a year (collection==None), or the sitemap for
a particular year and collection. Pass lastmod which is the current
modification time of the file according to its parent sitemap, which
is how it knows to return a cached copy.
Returns the sitemap parsed into a DOM.
"""
# Construct the URL and the path to where to cache the file on disk.
if year == None:
url = "http://www.gpo.gov/smap/fdsys/sitemap.xml"
path = "fdsys/sitemap/sitemap.xml"
elif collection == None:
url = "http://www.gpo.gov/smap/fdsys/sitemap_%s/sitemap_%s.xml" % (year, year)
path = "fdsys/sitemap/%s/sitemap.xml" % year
else:
url = "http://www.gpo.gov/smap/fdsys/sitemap_%s/%s_%s_sitemap.xml" % (year, year, collection)
path = "fdsys/sitemap/%s/%s.xml" % (year, collection)
# Should we re-download the file?
lastmod_cache_file = utils.cache_dir() + "/" + path.replace(".xml", "-lastmod.txt")
if options.get("cached", False):
# If --cached is used, don't hit the network.
force = False
elif not lastmod:
# No *current* lastmod date is known for this file (because it is the master
# sitemap file, probably), so always download.
force = True
else:
# If the file is out of date or --force is used, download the file.
cache_lastmod = utils.read(lastmod_cache_file)
force = (lastmod != cache_lastmod) or options.get("force", False)
if force:
logging.warn("Downloading: %s" % url)
body = utils.download(url, path, utils.merge(options, {
'force': force,
'binary': True
}))
if not body:
raise Exception("Failed to download %s" % url)
# Write the current last modified date to disk so we know the next time whether
# we need to fetch the file.
if lastmod and not options.get("cached", False):
utils.write(lastmod, lastmod_cache_file)
try:
return etree.fromstring(body)
except etree.XMLSyntaxError as e:
raise Exception("XML syntax error in %s: %s" % (url, str(e)))
示例11: fetch_vote
def fetch_vote(vote_id, options):
logging.info("\n[%s] Fetching..." % vote_id)
vote_chamber, vote_number, vote_congress, vote_session_year = utils.split_vote_id(vote_id)
if vote_chamber == "h":
url = "http://clerk.house.gov/evs/%s/roll%03d.xml" % (vote_session_year, int(vote_number))
else:
session_num = int(vote_session_year) - utils.get_congress_first_year(int(vote_congress)) + 1
url = "http://www.senate.gov/legislative/LIS/roll_call_votes/vote%d%d/vote_%d_%d_%05d.xml" % (int(vote_congress), session_num, int(vote_congress), session_num, int(vote_number))
# fetch vote XML page
body = utils.download(
url,
"%s/votes/%s/%s%s/%s%s.xml" % (vote_congress, vote_session_year, vote_chamber, vote_number, vote_chamber, vote_number),
utils.merge(options, {'binary': True}),
)
if not body:
return {'saved': False, 'ok': False, 'reason': "failed to download"}
if options.get("download_only", False):
return {'saved': False, 'ok': True, 'reason': "requested download only"}
if "This vote was vacated" in body:
# Vacated votes: 2011-484, 2012-327, ...
# Remove file, since it may previously have existed with data.
for f in (output_for_vote(vote_id, "json"), output_for_vote(vote_id, "xml")):
if os.path.exists(f):
os.unlink(f)
return {'saved': False, 'ok': True, 'reason': "vote was vacated"}
dom = etree.fromstring(body)
vote = {
'vote_id': vote_id,
'chamber': vote_chamber,
'congress': int(vote_congress),
'session': vote_session_year,
'number': int(vote_number),
'updated_at': datetime.datetime.fromtimestamp(time.time()),
'source_url': url,
}
# do the heavy lifting
if vote_chamber == "h":
parse_house_vote(dom, vote)
elif vote_chamber == "s":
parse_senate_vote(dom, vote)
# output and return
output_vote(vote, options)
return {'ok': True, 'saved': True}
示例12: mirror_file
def mirror_file(year, collection, package_name, lastmod, granule_name, file_types, options):
# Where should we store the file?
path = get_output_path(year, collection, package_name, granule_name, options)
if not path: return # should skip
# Do we need to update this record?
lastmod_cache_file = path + "/lastmod.txt"
cache_lastmod = utils.read(lastmod_cache_file)
force = ((lastmod != cache_lastmod) or options.get("force", False)) and not options.get("cached", False)
# Try downloading files for each file type.
targets = get_package_files(package_name, granule_name, path)
updated_file_types = set()
for file_type in file_types:
if file_type not in targets: raise Exception("Invalid file type: %s" % file_type)
f_url, f_path = targets[file_type]
if (not force) and os.path.exists(f_path): continue # we already have the current file
logging.warn("Downloading: " + f_path)
data = utils.download(f_url, f_path, utils.merge(options, {
'binary': True,
'force': force,
'to_cache': False,
'needs_content': file_type == "text" and f_path.endswith(".html"),
}))
updated_file_types.add(file_type)
if not data:
if file_type == "pdf":
# expected to be present for all packages
raise Exception("Failed to download %s" % package_name)
else:
# not all packages have all file types, but assume this is OK
logging.error("file not found: " + f_url)
continue
if file_type == "text" and f_path.endswith(".html"):
# The "text" format files are put in an HTML container. Unwrap it into a .txt file.
# TODO: Encoding? The HTTP content-type header says UTF-8, but do we trust it?
# html.fromstring does auto-detection.
with open(f_path[0:-4] + "txt", "w") as f:
text_content = unicode(html.fromstring(data).text_content())
f.write(text_content.encode("utf8"))
if collection == "BILLS" and "mods" in updated_file_types:
# When we download bill files, also create the text-versions/data.json file
# which extracts commonly used components of the MODS XML.
from bill_versions import write_bill_version_metadata
write_bill_version_metadata(get_bill_id_for_package(package_name, with_version=True))
# Write the current last modified date to disk so we know the next time whether
# we need to fetch the files for this sitemap item.
if lastmod and not options.get("cached", False):
utils.write(lastmod, lastmod_cache_file)
示例13: generate_validation_batch
def generate_validation_batch(required_input_keys, required_output_keys, set="validation"):
# generate sunny data
sunny_length = get_lenght_of_set(name="sunny", set=set)
regular_length = get_lenght_of_set(name="regular", set=set)
sunny_batches = int(np.ceil(sunny_length / float(_config().sunny_batch_size)))
regular_batches = int(np.ceil(regular_length / float(_config().batch_size)))
if "sunny" in required_input_keys or "segmentation" in required_output_keys:
num_batches = max(sunny_batches, regular_batches)
else:
num_batches = regular_batches
num_chunks = int(np.ceil(num_batches / float(_config().batches_per_chunk)))
sunny_chunk_size = _config().batches_per_chunk * _config().sunny_batch_size
regular_chunk_size = _config().batches_per_chunk * _config().batch_size
for n in xrange(num_chunks):
result = {}
input_keys_to_do = list(required_input_keys) # clone
output_keys_to_do = list(required_output_keys) # clone
if "sunny" in input_keys_to_do or "segmentation" in output_keys_to_do:
indices = range(n*sunny_chunk_size, (n+1)*sunny_chunk_size)
sunny_patient_data = get_sunny_patient_data(indices, set="train")
result = utils.merge(result, sunny_patient_data)
input_keys_to_do.remove("sunny")
output_keys_to_do.remove("segmentation")
indices = range(n*regular_chunk_size, (n+1)*regular_chunk_size)
kaggle_data = get_patient_data(indices, input_keys_to_do, output_keys_to_do, set=set,
preprocess_function=_config().preprocess_validation)
result = utils.merge(result, kaggle_data)
yield result
示例14: fetch_version
def fetch_version(bill_version_id, options):
# Download MODS etc.
logging.info("\n[%s] Fetching..." % bill_version_id)
bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
# bill_id = "%s%s-%s" % (bill_type, number, congress)
utils.download(
mods_url_for(bill_version_id),
document_filename_for(bill_version_id, "mods.xml"),
utils.merge(options, {'binary': True, 'to_cache': False})
)
return write_bill_version_metadata(bill_version_id)
示例15: generate_train_batch
def generate_train_batch(required_input_keys, required_output_keys):
"""Creates an iterator that returns train batches."""
sunny_chunk_size = _config().sunny_batch_size * _config().batches_per_chunk
chunk_size = _config().batch_size * _config().batches_per_chunk
while True:
result = {}
input_keys_to_do = list(required_input_keys) #clone
output_keys_to_do = list(required_output_keys) #clone
if "sunny" in input_keys_to_do or "segmentation" in output_keys_to_do:
indices = _config().rng.randint(0, len(sunny_train_images), sunny_chunk_size)
sunny_patient_data = get_sunny_patient_data(indices, set="train")
result = utils.merge(result, sunny_patient_data)
input_keys_to_do.remove("sunny")
output_keys_to_do.remove("segmentation")
indices = _config().rng.randint(0, len(train_patient_folders), chunk_size) #
kaggle_data = get_patient_data(indices, input_keys_to_do, output_keys_to_do, set="train",
preprocess_function=_config().preprocess_train)
result = utils.merge(result, kaggle_data)
yield result