本文整理汇总了Python中SonicScrewdriver.dirty_pairtree方法的典型用法代码示例。如果您正苦于以下问题:Python SonicScrewdriver.dirty_pairtree方法的具体用法?Python SonicScrewdriver.dirty_pairtree怎么用?Python SonicScrewdriver.dirty_pairtree使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SonicScrewdriver
的用法示例。
在下文中一共展示了SonicScrewdriver.dirty_pairtree方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: set
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import dirty_pairtree [as 别名]
import csv
from collections import Counter
import SonicScrewdriver as utils
ficids = set()
meta = dict()
ficsource = "/Volumes/TARDIS/work/fiction/metadata/fiction_metadata.csv"
with open(ficsource, encoding="utf-8") as f:
reader = csv.DictReader(f)
fieldnames = reader.fieldnames
for row in reader:
htid = row["htid"]
dirtyhtid = utils.dirty_pairtree(htid)
ficids.add(dirtyhtid)
meta[dirtyhtid] = row
metasource = "/Volumes/TARDIS/work/metadata/MergedMonographs.tsv"
mysterysubjects = Counter()
scifisubjects = Counter()
gothsubjects = Counter()
gothclues = ["ghost stories", "gothic revival", "horror"]
genretags = dict()
def add_tag(genretags, htid, tagtoadd):
if htid not in genretags:
genretags[htid] = set()
示例2: print
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import dirty_pairtree [as 别名]
if thisreader not in readerowners[f]:
readerowners[f].append(thisreader)
paths[f].append(thispath)
print(len(tagset))
allfiles = tagset
# This is a list of all the filenames (note, filenames not docids)
# that we found in the /readers sourcedir.
train1 = pd.read_csv('../bzipmeta.csv', dtype = 'object', index_col = 'docid')
tidx = set(train1.index.values)
for filename in allfiles:
docid = filename.replace('.csv', '')
if utils.dirty_pairtree(docid) not in tidx:
print(docid)
genrestocheck = ['fic', 'poe']
equivalences = {'non', 'bio', 'other'}
volumesingenre = dict()
for g in genrestocheck:
volumesingenre[g] = []
alldocids = set()
for filename, owners in readerowners.items():
path = paths[filename][0]
if 'metadat' in filename:
print(filename)
示例3: open
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import dirty_pairtree [as 别名]
import SonicScrewdriver as utils
with open('/Users/tunder/Dropbox/GenreProject/metadata/getficids1899.txt', encoding = 'utf-8') as f:
ids = [x.rstrip() for x in f.readlines()]
newids = list()
for anid in ids:
newid = utils.dirty_pairtree(anid)
newids.append(newid)
with open('/Users/tunder/Dropbox/GenreProject/metadata/dirtyficids1899.txt', mode = 'w', encoding = 'utf-8') as f:
for anid in newids:
f.write(anid + '\n')
示例4: open
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import dirty_pairtree [as 别名]
# getidstoadd
import SonicScrewdriver as utils
import os
with open('/Users/tunder/Dropbox/GenreProject/python/granger/correctedmeta.tsv', encoding = 'utf-8') as f:
filelines = f.readlines()
ids2get = [x.split('\t')[0] for x in filelines]
fileswehave = os.listdir('/Users/tunder/Dropbox/GenreProject/python/granger/elite/')
idswehave = set([x.replace('.poe.tsv','') for x in fileswehave if x.endswith('.poe.tsv')])
with open('/Users/tunder/Dropbox/GenreProject/python/granger/ids2get.tsv', mode = 'w', encoding = 'utf-8') as f:
for anid in ids2get:
if anid not in idswehave and utils.clean_pairtree(anid) not in idswehave:
f.write(utils.dirty_pairtree(anid) + '\n')
示例5: open
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import dirty_pairtree [as 别名]
with open(metafile, encoding = 'utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
tagset = utils.get_tagset(row['genretags'])
if 'drop' in tagset:
continue
getthis = False
for tag in tagstoget:
if tag in tagset:
getthis = True
if getthis:
docidstoget.add(row['docid'])
filespresent = os.listdir('/Users/tunder/Dropbox/fiction/data/')
docidspresent = set([x.replace('.fic.tsv', '') for x in filespresent if x.endswith('.fic.tsv')])
docidsneeded = docidstoget - docidspresent
outfile = '/Users/tunder/Dropbox/fiction/meta/filestoget' + str(datetime.date.today()) + '.txt'
with open(outfile, mode = 'w', encoding = 'utf-8') as f:
for docid in docidsneeded:
outid = utils.dirty_pairtree(docid)
f.write(outid + '\n')