当前位置: 首页>>代码示例>>Python>>正文


Python IOtools.getfilenames_of_dir方法代码示例

本文整理汇总了Python中sentimentfinding.IOtools.getfilenames_of_dir方法的典型用法代码示例。如果您正苦于以下问题:Python IOtools.getfilenames_of_dir方法的具体用法?Python IOtools.getfilenames_of_dir怎么用?Python IOtools.getfilenames_of_dir使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sentimentfinding.IOtools的用法示例。


在下文中一共展示了IOtools.getfilenames_of_dir方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: add_resource_label

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def add_resource_label(matrixpath, datasetname, replacelabel=False, headers=True):
    matrixlines = IOtools.readtextlines(matrixpath)  # 1st item=fileid, lastitem=filecat.
    
    newmatrix = []
    
    if headers:
        matrixlines = matrixlines[2:]
    
    for instance in matrixlines:
        items = instance.split()
        fileid = items[0]
        print instance,
        path = datapath+os.sep+datasetname
        foldernames = IOtools.getfoldernames_of_dir(datapath+os.sep+datasetname)
        #print foldernames
        for folder in foldernames:
            allfileids = IOtools.getfilenames_of_dir(path+os.sep+folder, removeextension=False)
            #print allfileids
            if fileid in allfileids:
                newspath = path+os.sep+folder+os.sep+fileid
                resourcename = texter.getnewsmetadata(newspath, ["resource"])["resource"]
                #print "## ",resourcename,"  ",type(instance),"  ~~ ",instance
                
                if replacelabel: items = items[:-1]
                newmatrix.append(items +[resourcename])
                break
    
    return newmatrix
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:30,代码来源:matrixhandler.py

示例2: buildcorpus

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def buildcorpus(nfile, ncat, resourcename, path):
    resourcepath = path + os.sep + resourcename
    catnames = IOtools.getfoldernames_of_dir(resourcepath)[:ncat]
    
    featurematrix = []
    doctermmatrix = []
    cfdTermDoc = nltk.ConditionalFreqDist()
    
    for catname in catnames:
        fileids = []
        p = resourcepath + os.sep + catname + os.sep
        fileids.extend(IOtools.getfilenames_of_dir(p, removeextension=False)[:nfile])
        corpus = CorpusFeatures(fileids, resourcename+os.sep+catname, p)
        corpus.getfeatures()
        datapoints = corpus.build_featurematrix()
        for k,v in datapoints.iteritems():
            featurematrix.append([k]+v+[resourcename])
            
        corpus.plot_features()
        
        #doc term matrix
        cfd = corpus.build_termmatrix()
        for fileid in cfd.conditions():
            for term in list(cfd[fileid]):
                cfdTermDoc[fileid].inc(term)
    
    IOtools.todisc_matrix(featurematrix, IOtools.results_rootpath+os.sep+"MATRIX"+str(nfile*ncat)+"texts.txt", mode="a")
开发者ID:dicleoztur,项目名称:tez0.1v,代码行数:29,代码来源:dataspace.py

示例3: corpus_construction

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def corpus_construction():
    start = datetime.now()
    corpus = Corpus("test")
    rootpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/corpus/"
    labels = ["pos","neg"]
    labelwisepathlist = {}
    
    for label in labels:
        labelwisepathlist[label] = []
    for label in labels:
        labelwisepathlist[label] = IOtools.getfilenames_of_dir(rootpath+os.sep+label, removeextension=False)
    
    corpus.read_corpus(rootpath, labelwisepathlist)
    end = datetime.now()
    
    print "Reading takes: ", str(end-start)
    
    print corpus.cfd_RootDoc["alevi"].N()
    print corpus.cfd_RootDoc.N()
    print len(corpus.cfd_RootDoc.conditions())
        
    print corpus.cfd_DocRoot.N()
    print len(corpus.cfd_DocRoot.conditions())

    df = corpus.compute_tfidf()
    
    end2= datetime.now()
    print "tfidf matrix takes: ",str(end2-end)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:30,代码来源:documentsimilarity.py

示例4: getwordsandlemmasfromfile

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def getwordsandlemmasfromfile():
    rootpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/"
    
    corpuspath = rootpath + os.sep + "texts/"
    outwordspath = rootpath + os.sep + "weightedwords/"
    
    fileids = IOtools.getfilenames_of_dir(corpuspath, removeextension=False)
    
    
    
    for fileid in fileids:
        txt = texter.readtxtfile(corpuspath+os.sep+fileid)
        
        marker = "Haziran 2013"
        mark = txt.find(marker)    # skip metadata
        txt = txt[mark+len(marker):]
        
        words = texter.getwords(txt)
        lemmatuples = SAKsParser.findrootsinlexicon(words)
        roots = [root for _,root,_ in lemmatuples]
        
        fdwords = nltk.FreqDist(words)
        
        fdroots = nltk.FreqDist(roots)
        
        weightedwords = [word+"\t"+str(fdwords[word]) for word in list(fdwords)]
        weightedroots = [root+"\t"+str(fdroots[root]) for root in list(fdroots)]
               
        IOtools.todisc_list(outwordspath+os.sep+"lemma"+os.sep+fileid, weightedwords)
        IOtools.todisc_list(outwordspath+os.sep+"root"+os.sep+fileid, weightedroots)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:32,代码来源:articleanalysis.py

示例5: conduct_experiments

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def conduct_experiments(inrootpath=metacorpus.learningdatapath, outrootpath=metaexperimentation.expscorepath, normalize=False):
    annottypes = ["double"]
    setsizes = ["150"]
    taggertypes = ["user"]
    numofcombs = 5
    
    #nclasses = arrange_N_classes.nclasses   # [4,5]
    
    #models = []
    svmclassifier = SVM("")
    clusterer = Clustering("")
    nbclassifier = NaiveBayes("")
    #nbclassifier = MultinomialNB(outrootpath)
    models = [svmclassifier, nbclassifier, clusterer]
    
    
    for annotationtype in annottypes:
        
        sp1 = IOtools.ensure_dir(os.path.join(outrootpath, annotationtype))
        
        for setsize in setsizes:
            
            sp2 = IOtools.ensure_dir(os.path.join(sp1, setsize))
            
            datasetspath = metacorpus.get_datasets_path(annotationtype, setsize)  # finaldatasets
            labelspath = metacorpus.get_labels_path(annotationtype, setsize)
            nclasses = IOtools.getfoldernames_of_dir(labelspath)
                      
            combfilenames = IOtools.getfilenames_of_dir(datasetspath)
            combfilenames = combfilenames[:numofcombs]
            
            for combfile in combfilenames:
            
                Xpath = os.path.join(datasetspath, combfile + ".csv")
                sp3 = IOtools.ensure_dir(os.path.join(sp2, combfile))
                
                for nclass in nclasses:   # count it on labelspath not nclasses
                    
                    #nclabelspath = arrange_N_classes.nclass_label_folder(labelspath, nc)  # get folder path containing nc-grouped labels
                    nclabelspath = os.path.join(labelspath, nclass)
                    nc = nclass.split(metaexperimentation.intrafeatsep)[-1]
                    nc = int(nc)
                    sp4 = IOtools.ensure_dir(os.path.join(sp3, nclass)) #"NC-"+str(nc)))
                    
                    for taggertype in taggertypes:
                        
                        rootscorespath = IOtools.ensure_dir(os.path.join(sp4, taggertype))
                        metaexperimentation.initialize_score_file(rootscorespath)
                        ylabelspath = os.path.join(nclabelspath, taggertype+".csv")
                        
                        for model in models:
                            
                            #labelnames = metacorpus.get_label_names()
                            model.prepare_experiment(Xpath, ylabelspath, rootscorespath, labelnames=None, normalize=normalize)
                            model.apply_algorithms(nc)    
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:57,代码来源:learner.py

示例6: get_fileids_infolder

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def get_fileids_infolder(path, numofpoints):
    fileids = IOtools.getfilenames_of_dir(path, removeextension=False)
    
    if numofpoints > 0:
        selected = np.random.randint(0, len(fileids), numofpoints)
        fileids = np.array(fileids)
        fileids = fileids[selected]
    
    for fileid in fileids[:10]:
        print fileid,"  ",texter.getnewsmetadata(path+os.sep+fileid, ["resource"])
    return fileids.tolist()
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:13,代码来源:shell.py

示例7: clustering

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def clustering(corpuspath, resultspath, numofclusters):
    
    trainpath = corpuspath + os.sep + "train" + os.sep
    testpath = corpuspath + os.sep + "test" + os.sep
    
    
    # feature hold out!
    featurespaces = IOtools.getfilenames_of_dir(trainpath, removeextension=True)
    for featurespace in featurespaces:
        inpath = trainpath
        procedurename = "kmeans#"+str(numofclusters)+"_"+featurespace
        recordpath = resultspath
        classify.perform_clustering(featurespace, inpath, procedurename, recordpath, numofclusters)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:15,代码来源:shell.py

示例8: merge_word_lists

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def merge_word_lists(indirectory, outdirectory, outfilename):
    fileids = IOtools.getfilenames_of_dir(indirectory, removeextension=False)
    

    allwords = []    
    for fileid in fileids:
        words = IOtools.readtextlines(indirectory+os.sep+fileid)
        allwords.extend(words)
    IOtools.todisc_list(outdirectory+os.sep+outfilename+".txt", allwords)
    
    fdist = nltk.FreqDist(allwords)
    IOtools.todisc_freqdist(outdirectory+os.sep+"weighted-"+outfilename+".txt", fdist)
    '''
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:15,代码来源:articleanalysis.py

示例9: conduct_experiments2

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def conduct_experiments2(resultspath):
    
    datafolder = "/home/dicle/Dicle/Tez/corpusstats/learning/data/random-single-N5/finaldatasets_test/"
    datasetname = "feat-00111110000"
    
    datasetnames = IOtools.getfilenames_of_dir(datafolder)
    for datasetname in datasetnames: 
        epath = IOtools.ensure_dir(resultspath+os.sep+datasetname)
        experiment = Experimentation(experimentrootpath=epath, datasetfolder=datafolder, datasetname=datasetname)    
        datamatrixcsvpath, ylabels = experiment.prepare_data()
        
        #clusterer = Clustering(erootpath=epath, datamatrixpath=datamatrixcsvpath, yvector=ylabels)
        #clusterer.apply_algorithms(scorefilepath=experiment.scorefilepath)
       
        svmclassifier = SVM(erootpath=epath, datamatrixpath=datamatrixcsvpath, yvector=ylabels)
        svmclassifier.apply_algorithms(scorefilepath=experiment.scorefilepath)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:18,代码来源:learner.py

示例10: corpus_construction_fromwords

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def corpus_construction_fromwords():
    recordpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/corpus2/ldatests22Temmuz/edit/wordletest/matrix/"
    inputpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/corpus2/ldatests22Temmuz/edit/wordletest/words/"
    labels = ["inlier", "outlier"]
    labelwisepathlist = {}
    
    for label in labels:
        labelwisepathlist[label] = []
    for label in labels:
        labelwisepathlist[label] = IOtools.getfilenames_of_dir(inputpath+os.sep+label, removeextension=False)
    
    corpus = Corpus("wordletest")
    corpus.read_wordlists(inputpath, recordpath, labelwisepathlist)
    doctermfreqdf = corpus.get_docterm_matrix()
    corpus.compute_tfidf2(doctermfreqdf)
    return corpus
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:18,代码来源:documentsimilarity.py

示例11: crawlandmakexmlcorpus

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def crawlandmakexmlcorpus():
    
    for resource in resourcefolders:
        p1 = os.path.join(rawcorpuspath, resource)
        xp1 = IOtools.ensure_dir(os.path.join(xmlcorpuspath, resource))  # replicate the folder hierarchy into the xml folder as well
        categories = IOtools.getfoldernames_of_dir(p1)
        for cat in categories:
            p2 = os.path.join(p1,cat)
            xp2 = IOtools.ensure_dir(os.path.join(xp1, cat))
            txtfiles = IOtools.getfilenames_of_dir(p2, removeextension=True)
            
            for filename in txtfiles:
                txtpath = p2 + os.sep + filename + fromextension
                xmlpath = xp2 + os.sep + filename + toextension
                txtcontent = IOtools.readtxtfile(txtpath)
                xmlcontent = headxml + "\n" + txtcontent + "\n" + footxml
                IOtools.todisc_txt(xmlcontent, xmlpath)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:19,代码来源:XMLifycorpus.py

示例12: recordnewsmetadata_crawltxt

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def recordnewsmetadata_crawltxt(corpuspath=metacorpus.rawcorpuspath, resourcefolders=metacorpus.resources, csvfilepath=_metafilepath):
      
    for resource in resourcefolders:
        xp1 = IOtools.ensure_dir(os.path.join(corpuspath, resource))  # replicate the folder hierarchy into the xml folder as well
        categories = IOtools.getfoldernames_of_dir(xp1)
        
        for cat in categories:
            xp2 = IOtools.ensure_dir(os.path.join(xp1, cat))
            filenames = IOtools.getfilenames_of_dir(xp2, removeextension=False)
            
            for filename in filenames:
                filepath = xp2 + os.sep + filename 
                metadataline = getmetadata_fromtxt(filepath)    #metadataline = getmetadata_fromtxt(filepath+".txt") 
                #print csvfilepath               
                IOtools.todisc_txt(metadataline, csvfilepath, mode="a")
        
            print "finished "+resource+"/"+cat
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:19,代码来源:extractnewsmetadata.py

示例13: parseXML_phraseslexicon

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def parseXML_phraseslexicon(xmlinfilepath, txtoutfilepath):
    xmlfnames = IOtools.getfilenames_of_dir(xmlinfilepath, removeextension=False)   
    statsstr = "letter       numofphrases"+"\n"
    for fname in xmlfnames:
        letter = fname.split("_")[-1][:-4]    # each fname is of the form "ADB_letter.xml"
        print fname
        path = xmlinfilepath + os.sep  + fname
        tree = ET.parse(path)
        lexiconroot = tree.getroot()
        names = lexiconroot.findall(deyimDOMpath)
        phrases = []
        for name in names:
            phrase = name.text
            phrases.append(phrase.strip().lower())
        outpath = txtoutfilepath + os.sep + letter + ".txt"
        IOtools.todisc_list(outpath, phrases)  
        statsstr += letter+"\t"+str(len(phrases))+"\n"
    IOtools.todisc_txt(statsstr, txtoutfilepath+os.sep+"originalstats.txt")
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:20,代码来源:parsexml.py

示例14: classification

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def classification(corpuspath, resultspath):
    trainpath = corpuspath + os.sep + "train" + os.sep
    testpath = corpuspath + os.sep + "test" + os.sep
    
    classifiers = ["naivebayes", "ldac"]
    learner = {}
    learner["naivebayes"] = classify.NBclassifier()
    learner["ldac"] = classify.LDACclassifier()
    # feature hold out!
    featurespaces = IOtools.getfilenames_of_dir(trainpath, removeextension=True)
    
    
    for featurespace in featurespaces: 
        trainset = pd.read_csv(trainpath+os.sep+featurespace+".csv", index_col=0)
        testset = pd.read_csv(testpath+os.sep+featurespace+".csv", index_col=0)     
        for clsfalg in classifiers:
            procedurename = clsfalg+"#_"+featurespace
            recordpath = IOtools.ensure_dir(resultspath + os.sep + procedurename)
            learner[clsfalg].setname(procedurename)
            learner[clsfalg].run(trainset, testset, recordpath)
   
    
    '''
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:25,代码来源:shell.py

示例15: corpus_construction

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfilenames_of_dir [as 别名]
def corpus_construction():
    start = datetime.now()
    rootpath = "/home/dicle/Dicle/Tez/tests/test30-sept13/"
    inputpath = rootpath+os.sep+"dataset/"
    recordpath = rootpath+os.sep+"results/"
    
    corpus = Corpus("test30", recordpath)
    
    labels = ["cumhuriyet", "radikal", "vakit"]
    labelwisepathlist = {}
    
    for label in labels:
        labelwisepathlist[label] = []
    for label in labels:
        labelwisepathlist[label] = IOtools.getfilenames_of_dir(inputpath+os.sep+label, removeextension=False)
    
    corpus.read_corpus(inputpath, recordpath, labelwisepathlist)
    end = datetime.now()
    
    print "Reading takes: ", str(end-start)
    
    print corpus.cfd_RootDoc["alevi"].N()
    print corpus.cfd_RootDoc.N()
    print len(corpus.cfd_RootDoc.conditions())
        
    print corpus.cfd_DocRoot.N()
    print len(corpus.cfd_DocRoot.conditions())

    freqdf = corpus.get_docterm_matrix()
    tfidfdf = corpus.compute_tfidf2(freqdf)
    
    end2= datetime.now()
    print "tfidf matrix takes: ",str(end2-end)
    
    corpus.extract_features()
    end3 = datetime.now()
    print "features takes: ",str(end3-end2)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:39,代码来源:dataspaceV2.py


注:本文中的sentimentfinding.IOtools.getfilenames_of_dir方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。