本文整理汇总了Python中sentimentfinding.IOtools.readjson方法的典型用法代码示例。如果您正苦于以下问题:Python IOtools.readjson方法的具体用法?Python IOtools.readjson怎么用?Python IOtools.readjson使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sentimentfinding.IOtools
的用法示例。
在下文中一共展示了IOtools.readjson方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readjson [as 别名]
def main(folder):
singlejsonpath = os.path.join(folder, "singleannotation.json")
doublejsonpath = os.path.join(folder, "doubleannotation.json")
singleas = IOtools.readjson(singlejsonpath)
doubleas = IOtools.readjson(doublejsonpath)
print "Single assignments:"
traversesingles(singleas)
print "Double assignments:"
traversedoubles(doubleas)
示例2: generate_tables
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readjson [as 别名]
def generate_tables(self, singlejsonpath=None, doublejsonpath=None):
if singlejsonpath is None:
singlejsonpath=self.singles_jsonpath
if doublejsonpath is None:
doublejsonpath=self.doubles_jsonpath
selectedindices = []
user_textid = []
df = IOtools.readcsv(self.corpuspath)
df["annotator"] = ""
# singles
single_assignments = IOtools.readjson(singlejsonpath)
for userid, resourcedist in single_assignments.iteritems():
for resourcename, catdist in resourcedist.iteritems():
for catname, monthdist in catdist.iteritems():
for month, ntexts in monthdist.iteritems():
dfx = df[(df["resource"]==resourcename) & (df["category"]==catname) & (df["date"].str.startswith(month))]
for _ in range(ntexts):
#print resourcename,catname,month
#print dfx.values.shape," <- ",df.values.shape
randomindex = random.choice(dfx.index.values.tolist())
while randomindex in selectedindices:
randomindex = random.choice(dfx.index.values.tolist())
selectedindices.append(randomindex)
newsid = df.loc[randomindex, "newsid"]
newsid = str(int(newsid))
textid = resourcename + "-" + catname + "-" + str(newsid)
user_textid.append((userid, textid))
df.loc[randomindex, "annotator"] = userid
# record these newstexts to a csv
dfx = df.loc[selectedindices, :]
IOtools.tocsv(dfx, self.singles_csvpath)
# doubles
df["annotator2"] = ""
selectedindices2 = []
double_assignments = IOtools.readjson(doublejsonpath)
for i in range(0,self.ncoders,2):
for resourcename, catdist in double_assignments.iteritems():
#for resourcename, catdist in resourcedist.iteritems():
for catname, monthdist in catdist.iteritems():
for month, ntexts in monthdist.iteritems():
dfx = df[(df["resource"]==resourcename) & (df["category"]==catname) & (df["date"].str.startswith(month))]
for _ in range(ntexts):
randomindex = random.choice(dfx.index.values.tolist())
while randomindex in selectedindices:
randomindex = random.choice(dfx.index.values.tolist())
selectedindices.append(randomindex)
selectedindices2.append(randomindex)
newsid = df.loc[randomindex, "newsid"]
newsid = str(int(newsid))
textid = resourcename + "-" + catname + "-" + str(newsid)
'''user1 = self.coders[i]
user2 = self.coders[i+1]
user_textid.append((user1, textid))
user_textid.append((user2, textid)) '''
user1 = str(i)
user2 = str(i+1)
user_textid.append((user1, textid))
user_textid.append((user2, textid))
df.loc[randomindex, "annotator"] = user1
df.loc[randomindex, "annotator2"] = user2
dfx = df.loc[selectedindices2, :]
IOtools.tocsv(dfx, self.doubles_csvpath)
IOtools.tocsv(df, os.path.join(self.outfolder, "corpusstats_annotatable.csv"))
tablespath = IOtools.ensure_dir(os.path.join(self.outfolder,"tables"))
# write user table
self.generate_user_table(tablespath)
# write question table
self.generate_question_table(tablespath, selectedindices)
# write evaluation table
self.generate_evalutation_table(tablespath, user_textid)
示例3: has_attribute
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readjson [as 别名]
count_attrs[attr] += has_attribute(textid, attr)
labelattrcountdct[label] = count_attrs
if percentage:
for label in labelattrcountdct.keys():
labelattrcountdct[label] = dctcounts_to_percentage(labelattrcountdct[label])
return labelattrcountdct
if __name__ == "__main__":
sources = ["radikal", "solhaber", "vakit"]
categories = ["world", "economy", "politics", "turkey"]
jsonpath = "/home/dicle/Dicle/Tez/corpusstats/learning10/experiments_5fold_scale/scores/double/redef-rat_lex-rat/comb975_F_0-0_1-1_2-1_3-3_4-0_5-1_6-1_7-0_8-3/STGobj-ALLsubj/"
fname = "test_instances"
labeltextdct = IOtools.readjson(os.path.join(jsonpath, fname))
scounts = get_inlabel_stats(sources, labeltextdct, is_of_source, False)
print scounts
ccounts = get_inlabel_stats(categories, labeltextdct, is_in_category, False)
print ccounts
# convert jsons to csv. record them at learning_for_vis with the name of the test set
示例4: generate_tables
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readjson [as 别名]
def generate_tables(self, singlejsonpath=None, doublejsonpath=None):
if singlejsonpath is None:
singlejsonpath=self.singles_jsonpath
if doublejsonpath is None:
doublejsonpath=self.doubles_jsonpath
# to look up selected ids in previously selected newsids
oldquestions, numofoldquestions, numofoldevaluations, numofoldusers = self.getolderevaluations()
searchlist = []
searchlist.extend(oldquestions)
selectedindices = []
user_textid = []
df = IOtools.readcsv(self.corpuspath)
df["annotator"] = ""
# singles
single_assignments = IOtools.readjson(singlejsonpath)
for userid, resourcedist in single_assignments.iteritems():
for resourcename, catdist in resourcedist.iteritems():
for catname, monthdist in catdist.iteritems():
for month, ntexts in monthdist.iteritems():
dfx = df[(df["resource"]==resourcename) & (df["category"]==catname) & (df["date"].str.startswith(month))]
for _ in range(ntexts):
randomindex = random.choice(dfx.index.values.tolist())
nid = str(int(df.loc[randomindex, "newsid"]))
name = "-".join([resourcename,catname,nid])
if name in oldquestions:
#if randomindex in oldquestions:
print "IN OLD LIST: ",resourcename,"+",catname,"+",randomindex
while name in oldquestions:
#while randomindex in searchlist:
randomindex = random.choice(dfx.index.values.tolist())
nid = str(int(df.loc[randomindex, "newsid"]))
name = "-".join([resourcename,catname,nid])
print name," # ",
while randomindex in selectedindices: # or name in oldquestions:
#while randomindex in searchlist:
randomindex = random.choice(dfx.index.values.tolist())
print name," + ",
'''
#print resourcename,catname,month
#print dfx.values.shape," <- ",df.values.shape
randomindex = random.choice(dfx.index.values.tolist())
if randomindex in oldquestions:
print "IN OLD LIST: ",resourcename,"+",catname,"+",randomindex
while randomindex in selectedindices or randomindex in oldquestions:
randomindex = random.choice(dfx.index.values.tolist())
'''
selectedindices.append(randomindex)
newsid = df.loc[randomindex, "newsid"]
newsid = str(int(newsid))
textid = resourcename + "-" + catname + "-" + str(newsid)
user_textid.append((userid, textid))
df.loc[randomindex, "annotator"] = userid
# record these newstexts to a csv
dfx = df.loc[selectedindices, :]
IOtools.tocsv(dfx, self.singles_csvpath)
# doubles
df["annotator2"] = ""
selectedindices2 = []
double_assignments = IOtools.readjson(doublejsonpath)
for pairname, resourcedist in double_assignments.iteritems():
#for i in range(0,self.ncoders,2):
for resourcename, catdist in resourcedist.iteritems():
#for resourcename, catdist in resourcedist.iteritems():
for catname, monthdist in catdist.iteritems():
for month, ntexts in monthdist.iteritems():
dfx = df[(df["resource"]==resourcename) & (df["category"]==catname) & (df["date"].str.startswith(month))]
for _ in range(ntexts):
randomindex = random.choice(dfx.index.values.tolist())
#print "TYPE ",type(randomindex)," oldq: ",type(int(oldquestions[0]))
nid = str(int(df.loc[randomindex, "newsid"]))
name = "-".join([resourcename,catname,nid])
#print "TYPE ",type(name)," oldq: ",type(oldquestions[0])
if name in oldquestions:
#if randomindex in oldquestions:
print "IN OLD LIST: ",resourcename,"+",catname,"+",randomindex
while name in oldquestions:
#while randomindex in searchlist:
randomindex = random.choice(dfx.index.values.tolist())
nid = str(int(df.loc[randomindex, "newsid"]))
name = "-".join([resourcename,catname,nid])
print name," # ",
while randomindex in selectedindices: # or name in oldquestions:
#while randomindex in searchlist:
#.........这里部分代码省略.........