当前位置: 首页>>代码示例>>Python>>正文


Python Document.getNumDifferentWords方法代码示例

本文整理汇总了Python中document.Document.getNumDifferentWords方法的典型用法代码示例。如果您正苦于以下问题:Python Document.getNumDifferentWords方法的具体用法?Python Document.getNumDifferentWords怎么用?Python Document.getNumDifferentWords使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在document.Document的用法示例。


在下文中一共展示了Document.getNumDifferentWords方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from document import Document [as 别名]
# 或者: from document.Document import getNumDifferentWords [as 别名]
def main():

    # Initial input checking
    if len(sys.argv) < 2:
        print("Insufficient arguments. Usage: parse.py <fileName>")
        sys.exit(22)
    if os.path.isfile(sys.argv[1]) == False:
        print("File does not exist.")
        sys.exit(22)
    if sys.argv[1][-3:] == "txt":
        doc = Document()
        with open(sys.argv[1]) as file:
            parser = document.Parser(file, doc)
            parser.parseDocument()
 #       for w in doc:
 #           print("%s: %s" % (w, doc.getWordCount(w)))
        if len(sys.argv) >= 3:
            if "-stats" in sys.argv:
                print("Document Statistics:")
                print("Total Words         : %d" % (doc.getNumTotalWords()))
                print("Different Words     : %d" % (doc.getNumDifferentWords()))
                print("Number of Sentences : %d" % (doc.getNumSentences()))
                print("Number of Paragraphs: %d" % (doc.getNumParagraphs()))
                print("------------")
            if "-lw" in sys.argv:
                print("Words in Document:")
                for w in doc:
                    print("%s" % (w))
                print("------------")
            if "-lwf" in sys.argv:
                print("Word Frequency in Document:")
                for w in doc:
                    print("%s: %s" % (w, doc.getWordCount(w)))
                print("------------")
            if "-mfw" in sys.argv:
                print("Most Frequent Word(s) in Document:")
                maxFreq = 0
                mostFreqWords = []
                for w in doc:
                    if doc.getWordCount(w) > maxFreq:
                        maxFreq = doc.getWordCount(w)
                        mostFreqWords = []
                        mostFreqWords.append(w)
                    elif doc.getWordCount(w) == maxFreq:
                        mostFreqWords.append(w)
                for w in mostFreqWords:
                    print("%s: %s" % (w, maxFreq))
                print("------------")
            for args in sys.argv:
                if "-find=" in args:
                    searchWord = args[6:]
                    if len(searchWord) > 0:
                        print("Searching for Word \"%s\" in Document:" % searchWord)
                        print("%s: %s" % (searchWord, doc.getWordCount(searchWord)))
                        print("------------")
    elif sys.argv[1][-3:] == "csv":
        csv = Custom_CSV()
        with open(sys.argv[1]) as file:
            parser = custom_csv.CSV_Parser(file, csv)
            parser.parseCSV()
    else:
        print("Bad file extenstion.")
        sys.exit(22)
开发者ID:jmcgover,项目名称:cpe466,代码行数:65,代码来源:parse.py

示例2: main

# 需要导入模块: from document import Document [as 别名]
# 或者: from document.Document import getNumDifferentWords [as 别名]

#.........这里部分代码省略.........
                    print("---");
                    for e in c:
                        print("%f" % e)
                print("mean: %f" % vector_math.meanRow(c))
            else:
                print("Column %d had no entries" % col)
            print("------------")

    elif filename[-3:] == 'txt':
        doc = Document()
        # Read Document
        print('Opening document...')
        try:
            with open(args.file) as file:
                print('Parsing document...')
                parser = document.Parser(file, doc)
                parser.parseDocument()
                print('Done!')
                print("------------")
        except FileNotFoundError as e:
            print('Could not find file %s' % (args.file))
            return e.errno
        # Display Document
        if args.display_all:
            print('Displaying all words and their occurrences...')
            print('%-16s %-13s' % ('Word', 'Occurrences'))
            for w in doc:
                print('%-16s %-13d' % (w, doc.getWordCount(w)))
            print("------------")
        if args.most_frequent:
            print('Finding most frequent...')
            mostFreq = doc.getMostFrequentWord()
            print('%-16s %-13s' % ('Most Frequent', 'Occurrences'))
            print('%-16s %-13d' % (mostFreq, doc.getWordCount(mostFreq)))
            print("------------")
        if args.most_frequent_percent:
            percent = int(args.most_frequent_percent)
            if percent > 100 or percent < 0:
                print('%d is an invalid percentage -- must be between 0 and 100' % percent)
                return 22
            print('Finding words within %d percent of the most frequent...' % percent)
            mostFreqWords = doc.getMostFrequentWords(percent)
            print('%-16s %-13s' % ('Most Frequent', 'Occurrences'))
            for w in mostFreqWords:
                print('%-16s %-13d' % (w, doc.getWordCount(w)))
            print("------------")
        if args.top:
            topNum = int(args.top)
            if topNum < 0:
                print('Please do not give the program negative numbers')
                return 22
            print('Finding top %d words...' % topNum)
            topWords = doc.getTopWords(topNum)
            print('%-16s %-13s' % ('Top %d Frequent' % topNum, 'Occurrences'))
            for w in topWords:
                print('%-16s %-13d' % (w, doc.getWordCount(w)))
            print("------------")
        if args.equal_to:
            freq = int(args.equal_to)
            print('Finding words with %d ocurrences...' % freq)
            wordsEqual = doc.getWordsEqualToFrequency(freq)
            print('%-16s %-13s' % ('Word', 'Occurrences'))
            wordsEqual = sorted(wordsEqual)
            for w in wordsEqual:
                print('%-16s %-13d' % (w, doc.getWordCount(w)))
            print("------------")
        if args.above:
            freq = int(args.above)
            print('Finding words above %d occurrences...' % freq)
            wordsAbove = doc.getWordsAboveFrequency(freq)
            print('%-16s %-13s' % ('Word', 'Occurrences'))
            wordsAbove = sorted(wordsAbove)
            for w in wordsAbove:
                print('%-16s %-13d' % (w, doc.getWordCount(w)))
            print("------------")
        if args.word:
            print('Finding occurrences for %s...' % args.word)
            print('%-16s %-13s' % ('Word', 'Occurrences'))
            for w in args.word.split(','):
                print('%-16s %-13d' % (w, doc.getWordCount(w)))
            print("------------")
        if args.is_in:
            print('Checking if %s is in the document...' % args.is_in)
            if doc.getWordCount(args.is_in) > 0:
                print('"%s" IS in the document!' % args.is_in)
            else:
                print('"%s" IS NOT in the document!' % args.is_in)
            print("------------")
        if args.stats:
            print("Document Statistics:")
            print("Number Total Words    : %d" % (doc.getNumTotalWords()))
            print("Number Different Words: %d" % (doc.getNumDifferentWords()))
            print("Number of Sentences   : %d" % (doc.getNumSentences()))
            print("Number of Paragraphs  : %d" % (doc.getNumParagraphs()))
            print("------------")

    else:
        print('Please provide either a .csv or a .txt file to be parsed')
        return 22
    return 0
开发者ID:jmcgover,项目名称:cpe466,代码行数:104,代码来源:run.py


注:本文中的document.Document.getNumDifferentWords方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。