本文整理汇总了Python中_collections.defaultdict方法的典型用法代码示例。如果您正苦于以下问题:Python _collections.defaultdict方法的具体用法?Python _collections.defaultdict怎么用?Python _collections.defaultdict使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类_collections
的用法示例。
在下文中一共展示了_collections.defaultdict方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import _collections [as 别名]
# 或者: from _collections import defaultdict [as 别名]
def __init__(self, *args):
super(IDADebugger, self).__init__(*args)
self.hooked = False
self.trace = Trace()
self._module_name = 'IDADbg'
self.arch = get_arch_dynamic()
# init the cpu context with 0
if self.arch == 32:
self.ctx = {c: '0' for c in ['eax', 'ebx', 'edx', 'ecx', 'ebp', 'esp', 'eip', 'edi', 'esi', 'cf', 'zf', 'sf', 'of', 'pf',
'af', 'tf', 'df']}
elif self.arch == 64:
self.ctx = {c: '0' for c in ['rax', 'rbx', 'rdx', 'rcx', 'rbp', 'rsp', 'rip', 'edi', 'rsi', 'r8', 'r9', 'r10', 'r11', 'r12',
'r13', 'r14', 'r15', 'cf', 'zf', 'sf', 'of', 'pf', 'af', 'tf', 'df']}
self.IAT = []
self.func_args = defaultdict(lambda: set())
示例2: read
# 需要导入模块: import _collections [as 别名]
# 或者: from _collections import defaultdict [as 别名]
def read(self, fn):
d = defaultdict(lambda: [])
with open(fn) as fin:
for line_ind, line in enumerate(fin):
data = line.strip().split('\t')
text, rel = data[:2]
args = data[2:]
confidence = 1
curExtraction = Extraction(pred = rel,
head_pred_index = None,
sent = text,
confidence = float(confidence),
index = line_ind)
for arg in args:
curExtraction.addArg(arg)
d[text].append(curExtraction)
self.oie = d
示例3: find_vm_addr
# 需要导入模块: import _collections [as 别名]
# 或者: from _collections import defaultdict [as 别名]
def find_vm_addr(trace):
"""
Find the virtual machine addr
:param trace: instruction trace
:return: virtual function start addr
"""
push_dict = defaultdict(lambda: 0)
vm_func_dict = defaultdict(lambda: 0)
# try to find the vm Segment via series of push commands, which identify the vm_addr also
for line in trace:
try:
if line.disasm[0] == 'push':
push_dict[GetFunctionAttr(line.addr, FUNCATTR_START)] += 1
except:
pass
vm_func = max(push_dict, key=push_dict.get)
vm_seg_start = SegStart(vm_func)
vm_seg_end = SegEnd(vm_func)
# test wheather the vm_func is the biggest func in the Segment
vm_funcs = Functions(vm_seg_start, vm_seg_end)
for f in vm_funcs:
vm_func_dict[f] = GetFunctionAttr(f, FUNCATTR_END) - GetFunctionAttr(f, FUNCATTR_START)
if max(vm_func_dict, key=vm_func_dict.get) != vm_func:
return AskAddr(vm_func,
"Found two possible addresses for the VM function start address: %s and %s. Choose one!" %
(vm_func, max(vm_func_dict, key=vm_func_dict.get)))
else:
return vm_func
示例4: find_virtual_regs
# 需要导入模块: import _collections [as 别名]
# 或者: from _collections import defaultdict [as 别名]
def find_virtual_regs(trace, manual=False, update=None):
"""
Maps the virtual registers on the stack to the actual registers after the vm exit.
:param trace: instruction trace
:return: virtual registers dict which maps the real regs onto virtual ones via stack addresses
"""
vmr = get_vmr()
assert isinstance(trace, Trace)
virt_regs = defaultdict(lambda: False)
# trace, vm_seg_start, vm_seg_end = extract_vm_segment(trace)
while trace:
try:
elem = trace.pop(len(trace) - 1)
if len(elem.disasm) > 0 and elem.disasm[0] == 'pop':
opnd = elem.disasm[1]
if get_reg_class(opnd) is None: # if not a register it is a mem_loc
pass
elif virt_regs[opnd]:
pass
else:
# the context always shows the registers after the execution, so we nee the SP from the instruction before
stack_addr = trace[len(trace) - 1].ctx[get_reg('rsp', trace.ctx_reg_size)]
virt_regs[opnd] = stack_addr
except:
pass
if update is not None:
update.pbar_update(60)
vmr.vm_stack_reg_mapping = virt_regs
if manual:
print ''.join('%s:%s\n' % (c, virt_regs[c]) for c in virt_regs.keys())
return virt_regs
示例5: __getitem__
# 需要导入模块: import _collections [as 别名]
# 或者: from _collections import defaultdict [as 别名]
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
示例6: read
# 需要导入模块: import _collections [as 别名]
# 或者: from _collections import defaultdict [as 别名]
def read(self, fn):
d = defaultdict(lambda: [])
with open(fn) as fin:
for line in fin:
data = line.strip().split('\t')
text, base_rel, rel = data[:3]
args = data[3:]
confidence = 1
curExtraction = Extraction(pred = rel, sent = text, confidence = float(confidence))
for arg in args:
curExtraction.addArg(arg)
d[text].append(curExtraction)
self.oie = d
示例7: dynamic_vm_values
# 需要导入模块: import _collections [as 别名]
# 或者: from _collections import defaultdict [as 别名]
def dynamic_vm_values(trace, code_start=BADADDR, code_end=BADADDR, silent=False):
"""
Find the virtual machine context necessary for an automated static analysis.
code_start = the bytecode start -> often the param for vm_func and usually starts right after vm_func
code_end = the bytecode end -> bytecode usually a big chunk, so if we identify several x86/x64 inst in a row we reached the end
base_addr = startaddr of the jmp table -> most often used offset in the vm_trace
vm_addr = startaddr of the vm function -> biggest function in .vmp segment,
:param trace: instruction trace
:return: vm_ctx -> [code_start, code_end, base_addr, vm_func_addr, vm_funcs]
"""
base_addr = defaultdict(lambda: 0)
vm_addr = find_vm_addr(deepcopy(trace))
trace, vm_seg_start, vm_seg_end = extract_vm_segment(trace)
code_addrs = []
# try finding code_start
if code_start == BADADDR:
code_start = GetFunctionAttr(vm_addr, FUNCATTR_END)#NextHead(GetFunctionAttr(vm_addr, FUNCATTR_END), vm_seg_end)
code_start = NextHead(code_start, BADADDR)
while isCode(code_start):
code_start = NextHead(code_start, BADADDR)
for line in trace:
# construct base addr dict of offsets -> jmp table should be the one most used
if len(line.disasm) == 2:
try:
offset = re.findall(r'.*:off_([0123456789abcdefABCDEF]*)\[.*\]', line.disasm[1])[0]
base_addr[offset] += 1
except:
pass
# code_start additional search of vm_func params
if line.addr == vm_addr:
for l in trace[:trace.index(line)]:
if l.disasm[0] == 'push':
try:
arg = re.findall(r'.*_([0123456789ABCDEFabcdef]*)', l.disasm[1])
if len(arg) == 1:
code_addrs.append(int(arg[0], 16))
except Exception, e:
print e.message
# finalize base_addr
示例8: gen_trace
# 需要导入模块: import _collections [as 别名]
# 或者: from _collections import defaultdict [as 别名]
def gen_trace(self, trace_start=BeginEA(), trace_end=BADADDR):
"""
Generate trace for the loaded binary.
:param trace_start:
:param trace_end:
:return:
"""
vmr = get_vmr()
self.trace_init()
# reset color
heads = Heads(SegStart(ScreenEA()), SegEnd(ScreenEA()))
for i in heads:
SetColor(i, CIC_ITEM, 0xFFFFFF)
# start exec
RunTo(BeginEA())
event = GetDebuggerEvent(WFNE_SUSP, -1)
# enable tracing
EnableTracing(TRACE_STEP, 1)
if vmr.sys_libs:
pass
event = GetDebuggerEvent(WFNE_ANY | WFNE_CONT, -1)
while True:
event = GetDebuggerEvent(WFNE_ANY, -1)
addr = GetEventEa()
# change color of executed line
current_color = GetColor(addr, CIC_ITEM)
new_color = self.get_new_color(current_color)
SetColor(addr, CIC_ITEM, new_color)
# break by exception
if event <= 1:
break
# standardize the difference between ida_trace.txt files and generated trace files by debugger hook:
# since dbg_trace returns the cpu context before the instruction execution and trace files the ctx after
for line in self.trace:
try:
line.ctx = self.trace[self.trace.index(line) + 1].ctx
except IndexError:
line.ctx = defaultdict(lambda: '0')
# return the trace, for population see dbg_trace() below
msg('[*] Trace generated!\n')
if vmr.extract_param:
vmr.func_args = self.func_args
for key in self.func_args.keys():
print 'Function %s call args:' % key, ''.join('%s, ' % arg for arg in self.func_args[key]).rstrip(', ')
return self.trace
示例9: find
# 需要导入模块: import _collections [as 别名]
# 或者: from _collections import defaultdict [as 别名]
def find(self, value, k=2, exclude_obj_ids=set(), exclude_obj_id_contain=None):
"""
查找相似的text的 id,逻辑比较复杂
1.分割要查找的origin_simhash的value成为多个key
2.将每个key查询倒排索引,得到对应可能相似的 related_simhash
3.求origin_simhash与 related_simhash之间的编辑距离 d
4.统计每个related_simhash和对应 编辑距离 d
5.多次出现的求一个额外的平均信息
6.将related_simhash按照 d从小到大排序
"""
assert value != None
if isinstance(value, (str, unicode)):
simhash = Simhash(value=value, f=self.f)
elif isinstance(value, Simhash):
simhash = value
else:
raise 'value not text or simhash'
assert simhash.f == self.f
sim_hash_dict = defaultdict(list)
ans = set()
for key in self.get_keys(simhash):
with Timer(msg='==query: %s' % key):
simhash_invertindex = SimhashInvertedIndex.objects.filter(key=key)
if simhash_invertindex:
simhash_caches_index = [sim_index.simhash_value_obj_id
for sim_index in simhash_invertindex]
else:
# logging.warning('SimhashInvertedIndex not exists key %s: %s' % (key, e))
continue
with Timer(msg='find d < k %d' % (k)):
if len(simhash_caches_index) > 200:
logging.warning('Big bucket found. key:%s, len:%s', key, len(simhash_caches_index))
for simhash_cache in simhash_caches_index:
try:
sim2, obj_id = simhash_cache.split(',', 1)
if obj_id in exclude_obj_ids or \
(exclude_obj_id_contain and exclude_obj_id_contain in simhash_cache):
continue
sim2 = Simhash(long(sim2, 16), self.f)
d = simhash.distance(sim2)
# print '**' * 50
# print "d:%d obj_id:%s key:%s " % (d, obj_id, key)
sim_hash_dict[obj_id].append(d)
if d < k:
ans.add(obj_id)
except Exception, e:
logging.warning('not exists %s' % (e))
示例10: test_demo
# 需要导入模块: import _collections [as 别名]
# 或者: from _collections import defaultdict [as 别名]
def test_demo(self):
dao = Dao(DAO_URL, sql_logging=False)
dao.load_gtfs(DUMMY_GTFS)
print("List of stops named '...Bordeaux...':")
stops_bordeaux = list(dao.stops(fltr=(Stop.stop_name.ilike('%Bordeaux%')) & (Stop.location_type == Stop.TYPE_STOP)))
for stop in stops_bordeaux:
print(stop.stop_name)
print("List of routes passing by those stops:")
routes_bordeaux = dao.routes(fltr=or_(StopTime.stop == stop for stop in stops_bordeaux))
for route in routes_bordeaux:
print("%s - %s" % (route.route_short_name, route.route_long_name))
july4 = CalendarDate.ymd(2016, 7, 4)
print("All departures from those stops on %s:" % (july4.as_date()))
departures = list(dao.stoptimes(fltr=(or_(StopTime.stop == stop for stop in stops_bordeaux)) & (StopTime.departure_time != None) & (func.date(CalendarDate.date) == july4.date)))
print("There is %d departures" % (len(departures)))
for departure in departures:
print("%30.30s %10.10s %-20.20s > %s" % (departure.stop.stop_name, fmttime(departure.departure_time), departure.trip.route.route_long_name, departure.trip.trip_headsign))
print("Number of departures and time range per stop on %s:" % (july4.as_date()))
departure_by_stop = defaultdict(list)
for departure in departures:
departure_by_stop[departure.stop].append(departure)
for stop, deps in departure_by_stop.items():
min_dep = min(d.departure_time for d in deps)
max_dep = max(d.departure_time for d in deps)
print("%30.30s %3d departures (from %s to %s)" % (stop.stop_name, len(deps), fmttime(min_dep), fmttime(max_dep)))
# Compute the average distance and time to next stop by route type
ntd = [ [0, 0, 0.0] for type in range(0, Route.TYPE_FUNICULAR + 1) ]
for departure in departures:
# The following is guaranteed to succeed as we have departure_time == Null for last stop time in trip
next_arrival = departure.trip.stop_times[departure.stop_sequence + 1]
hop_dist = next_arrival.shape_dist_traveled - departure.shape_dist_traveled
hop_time = next_arrival.arrival_time - departure.departure_time
route_type = departure.trip.route.route_type
ntd[route_type][0] += 1
ntd[route_type][1] += hop_time
ntd[route_type][2] += hop_dist
for route_type in range(0, len(ntd)):
n, t, d = ntd[route_type]
if n > 0:
print("The average distance to the next stop on those departures for route type %d is %.2f meters" % (route_type, d / n))
print("The average time in sec to the next stop on those departures for route type %d is %s" % (route_type, fmttime(t / n)))
示例11: __init__
# 需要导入模块: import _collections [as 别名]
# 或者: from _collections import defaultdict [as 别名]
def __init__(self, tagset_sizes, num_lstm_layers, hidden_dim, word_embeddings, no_we_update, use_char_rnn, charset_size, char_embedding_dim, att_props=None, vocab_size=None, word_embedding_dim=None):
'''
:param tagset_sizes: dictionary of attribute_name:number_of_possible_tags
:param num_lstm_layers: number of desired LSTM layers
:param hidden_dim: size of hidden dimension (same for all LSTM layers, including character-level)
:param word_embeddings: pre-trained list of embeddings, assumes order by word ID (optional)
:param no_we_update: if toggled, don't update embeddings
:param use_char_rnn: use "char->tag" option, i.e. concatenate character-level LSTM outputs to word representations (and train underlying LSTM). Only 1-layer is supported.
:param charset_size: number of characters expected in dataset (needed for character embedding initialization)
:param char_embedding_dim: desired character embedding dimension
:param att_props: proportion of loss to assign each attribute for back-propagation weighting (optional)
:param vocab_size: number of words in model (ignored if pre-trained embeddings are given)
:param word_embedding_dim: desired word embedding dimension (ignored if pre-trained embeddings are given)
'''
self.model = dy.Model()
self.tagset_sizes = tagset_sizes
self.attributes = list(tagset_sizes.keys())
self.we_update = not no_we_update
if att_props is not None:
self.att_props = defaultdict(float, {att:(1.0-p) for att,p in att_props.items()})
else:
self.att_props = None
if word_embeddings is not None: # Use pretrained embeddings
vocab_size = word_embeddings.shape[0]
word_embedding_dim = word_embeddings.shape[1]
self.words_lookup = self.model.add_lookup_parameters((vocab_size, word_embedding_dim), name="we")
if word_embeddings is not None:
self.words_lookup.init_from_array(word_embeddings)
# Char LSTM Parameters
self.use_char_rnn = use_char_rnn
self.char_hidden_dim = hidden_dim
if use_char_rnn:
self.char_lookup = self.model.add_lookup_parameters((charset_size, char_embedding_dim), name="ce")
self.char_bi_lstm = dy.BiRNNBuilder(1, char_embedding_dim, hidden_dim, self.model, dy.LSTMBuilder)
# Word LSTM parameters
if use_char_rnn:
input_dim = word_embedding_dim + hidden_dim
else:
input_dim = word_embedding_dim
self.word_bi_lstm = dy.BiRNNBuilder(num_lstm_layers, input_dim, hidden_dim, self.model, dy.LSTMBuilder)
# Matrix that maps from Bi-LSTM output to num tags
self.lstm_to_tags_params = {}
self.lstm_to_tags_bias = {}
self.mlp_out = {}
self.mlp_out_bias = {}
for att, set_size in list(tagset_sizes.items()):
self.lstm_to_tags_params[att] = self.model.add_parameters((set_size, hidden_dim), name=att+"H")
self.lstm_to_tags_bias[att] = self.model.add_parameters(set_size, name=att+"Hb")
self.mlp_out[att] = self.model.add_parameters((set_size, set_size), name=att+"O")
self.mlp_out_bias[att] = self.model.add_parameters(set_size, name=att+"Ob")