当前位置: 首页>>代码示例>>Python>>正文


Python utils.to_unicode函数代码示例

本文整理汇总了Python中utils.to_unicode函数的典型用法代码示例。如果您正苦于以下问题:Python to_unicode函数的具体用法?Python to_unicode怎么用?Python to_unicode使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了to_unicode函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

    def __init__(self, input, transposed=True):
        """
        Initialize the matrix reader.

        The `input` refers to a file on local filesystem, which is expected to
        be in the sparse (coordinate) Matrix Market format. Documents are assumed
        to be rows of the matrix (and document features are columns).

        `input` is either a string (file path) or a file-like object that supports
        `seek()` (e.g. gzip.GzipFile, bz2.BZ2File).
        """
        logger.info("initializing corpus reader from %s" % input)
        self.input, self.transposed = input, transposed
        with utils.file_or_filename(self.input) as lines:
            try:
                header = utils.to_unicode(next(lines)).strip()
                if not header.lower().startswith('%%matrixmarket matrix coordinate real general'):
                    raise ValueError("File %s not in Matrix Market format with coordinate real general; instead found: \n%s" %
                                    (self.input, header))
            except StopIteration:
                pass

            self.num_docs = self.num_terms = self.num_nnz = 0
            for lineno, line in enumerate(lines):
                line = utils.to_unicode(line)
                if not line.startswith('%'):
                    self.num_docs, self.num_terms, self.num_nnz = map(int, line.split())
                    if not self.transposed:
                        self.num_docs, self.num_terms = self.num_terms, self.num_docs
                    break

        logger.info("accepted corpus with %i documents, %i features, %i non-zero entries" %
                     (self.num_docs, self.num_terms, self.num_nnz))
开发者ID:nathan2718,项目名称:category2vec,代码行数:33,代码来源:matutils.py

示例2: single_picky

def single_picky(slug='test'):
    try:
        f = open(PICKY_DIR + slug + '.md')
    except IOError:
        abort(404)
    picky = f.read()
    f.close()
    meta_regex = re.compile(
            r"^\s*(?:-|=){3,}\s*\n((?:.|\n)+?)\n\s*(?:-|=){3,}\s*\n*",
            re.MULTILINE
        )
    match = re.match(meta_regex, picky)
    if not match:
        abort(404)
    metas = match.group(1)
    title = None
    date = None
    meta = metas.split("\n")
    try:
        title = meta[0].split("=>")[1]
    except IndexError:
        title = meta[0].split("=>")[0]
    try:
        date = meta[1].split("=>")[1]
    except IndexError:
        date = meta[1].split("=>")[0]
    cont = to_unicode(picky[match.end():])
    content = to_markdown(cont)
    return template('picky.html', content=content, title=to_unicode(title),
                                 date=to_unicode(date), slug=slug)
开发者ID:iTriumph,项目名称:MiniAkio,代码行数:30,代码来源:picky.py

示例3: load_cat2vec_format

 def load_cat2vec_format(cls, cat_model=None, sent_model=None, word_model=None):
     """
     Load sentence vectors
     """
     model = Category2Vec(None)
     count = 0
     if cat_model:
         logger.info("loading %s object(cat) from %s" % (cls.__name__, cat_model))
         for line in open(cat_model,"r"):
             line = line.rstrip()
             if count == 0:
                 info = line.split()
                 model.cat_len = int(info[0])
                 model.layer1_size = int(info[1])
                 model.sg = int(info[2])
                 model.hs = int(info[3])
                 model.negative = int(info[4])
                 model.cbow_mean = int(info[5])
                 model.cats = empty((model.cat_len, model.layer1_size), dtype=REAL)
                 model.cat_no_hash = {}
                 model.cat_id_list = []
             else:
                 idx = count - 1
                 row = line.split("\t")
                 cat_id = utils.to_unicode(row[0])
                 model.cat_no_hash[cat_id] = idx
                 model.cat_id_list.append(cat_id)
                 vals = row[1].split()
                 for j in xrange(model.layer1_size):
                     model.cats[idx][j] = float(vals[j])
             count += 1
     count = 0
     if sent_model:
         logger.info("loading %s object(sentence) from %s" % (cls.__name__, sent_model))
         for line in open(sent_model,"r"):
             line = line.rstrip()
             if count == 0:
                 info = line.split()
                 model.sents_len = int(info[0])
                 model.sents = empty((model.sents_len, model.layer1_size), dtype=REAL)
                 model.sent_no_hash = {}
                 model.sent_id_list = []
             else:
                 idx = count - 1
                 row = line.split("\t")
                 sent_id = utils.to_unicode(row[0])
                 model.sent_no_hash[sent_id] = idx
                 model.sent_id_list.append(sent_id)
                 vals = row[1].split()
                 for j in xrange(model.layer1_size):
                     model.sents[idx][j] = float(vals[j])
             count += 1
     if word_model:
         logger.info("loading word2vec from %s" % word_model)
         model.w2v = Word2Vec.load(word_model)
         model.vocab = model.w2v.vocab
     return model
开发者ID:nathan2718,项目名称:category2vec,代码行数:57,代码来源:cat2vec.py

示例4: __init__

    def __init__ (self, id, uri, name, type):
        if id is None:
            self.id = DBRepository.id_counter
            DBRepository.id_counter += 1
        else:
            self.id = id

        self.uri = to_unicode (uri)
        self.name = to_unicode (name)
        self.type = to_unicode (type)
开发者ID:achernet,项目名称:CVSAnalY,代码行数:10,代码来源:Database.py

示例5: __init__

 def __init__ (self, id, commit):
     if id is None:
         self.id = DBLog.id_counter
         DBLog.id_counter += 1
     else:
         self.id = id
         
     self.rev = to_unicode (commit.revision)
     self.committer = None
     self.author = None
     self.date = commit.date
     self.message = to_unicode (commit.message)
     self.composed_rev = commit.composed_rev
开发者ID:AlertProject,项目名称:CVSAnalY,代码行数:13,代码来源:Database.py

示例6: __iter__

 def __iter__(self):
     """Iterate through the lines in the source."""
     try:
         # Assume it is a file-like object and try treating it as such
         # Things that don't have seek will trigger an exception
         self.source.seek(0)
         for line in self.source:
             yield utils.to_unicode(line).split()
     except AttributeError:
         # If it didn't work like a file, use it as a string filename
         with utils.smart_open(self.source) as fin:
             for line in fin:
                 yield utils.to_unicode(line).split()
开发者ID:nathan2718,项目名称:category2vec,代码行数:13,代码来源:word2vec.py

示例7: response

    def response(self,msg,**kwargs):
        ## msg is parsed and your handled data.Actually,it is a dict.
        ## Your could specify a type by assign.ex response(type='music').I list all legal types.
        '''
        ex: response(message,type='yourType')
        optional kwargs:
        type='legal_types',content='yourContent',handler=foo,count=1 
        ps:when type is news,the count kwarg is nessceary
        support types:
        text,image,voice,video,music,news
        '''
        msg['receiver'],msg['sender'] = msg['sender'],msg['receiver']
        legal_types = ['text','music','image','voice','video','news']

        ## get some kwargs ##
        # key word content ---- which force type to textand return a static string
        if kwargs.get('type'):
            type = kwargs.get('type')
        else:type = msg['type']
        if type == 'music':
            if not msg['hq_musurl']:
                msg['hq_musurl'] = msg['musurl']
        # charge receiver and sender
        if kwargs.get('content'):
            msg['type'] = type = 'text'
            msg['content'] = to_unicode(kwargs.get('content'))
        if not type in legal_types:
            raise Exception("Illgal type!You could only choose one type from legal_types!") 
        # key word handler ---- which is a function object,accept a dict and return a modified dict
        else:
            msg['type'] = type
        if kwargs.get('handler'):
            msg = kwargs.get('handler')(msg)
        ## more kwargs ##

        if not type == 'news':
            template = to_unicode(getattr(Template(),type))
        else:
            count = kwargs.get('count')
            if count:
                temp = Template() 
                template = to_unicode(temp.news(count))
            else:
                raise Exception('When type is set to news,the count kwarg is necessary!')

        logging.info(template.format(**msg))
        try:
            retdata = template.format(**msg)
        except:
            raise Exception("You did't pass enough args or pass wrong args,please check args which template needed.Read template.py maybe inspire your mind")
        return retdata
开发者ID:SeavantUUz,项目名称:lolibot,代码行数:51,代码来源:loli.py

示例8: add_header

 def add_header(self):
     if self.file is not None :
         dis = ""
         dis += "Script file    : %s\n" % sys.argv[0]
         dis += "Date           : %s\n" % time.strftime("%d/%m/%Y %H:%M:%S", self.gtime.start_date)
         dis += "\n%s\n" % self.format("Time(s)", "Scope", "Info")
         self.file.write(utils.to_unicode(dis))
开发者ID:BackupTheBerlios,项目名称:pytestemb,代码行数:7,代码来源:trace.py

示例9: __init__

    def __init__(self, unique_name, base_filepath, parameters):
        """
        Arguments
        ---------
            keyword_name : feature unique name
            base_filepath : filepath of feature config
            parameters : lexicon parameters, presented by dictionary
        """
        self.unique_name = unique_name
        self.parameters = parameters

        filepath = os.path.join(
            base_filepath,
            parameters[BagOfClustersFeature.PARAM_CLUSTERED_WORDS_FILEPATH])

        if parameters[BagOfClustersFeature.PARAM_ENABLED] == 'false':
            return

        print "Loading file with clusters of words: {}".format(filepath)
        with io.open(filepath, 'r', encoding='utf-8') as f:
            self.clustered_words = json.load(f, encoding='utf-8')

        print "Create dictionary with all clusters, accessed by cluster_id ..."
        self.clusters = {}
        for word in self.clustered_words.iterkeys():
            cluster_id = self.clustered_words[word]
            if cluster_id not in self.clusters:
                self.clusters[cluster_id] = []
            self.clusters[cluster_id].append(utils.to_unicode(word))
开发者ID:nicolay-r,项目名称:tone-classifier,代码行数:29,代码来源:clustered_words.py

示例10: _create_des_

def _create_des_(msg):
    if msg is None :
        return {}
    elif not(isinstance(msg, types.StringTypes)):
        raise pexception.PytestembError("Msg must be a string")
    else:
        return dict({"msg":"%s" % utils.to_unicode(msg)})
开发者ID:BackupTheBerlios,项目名称:pytestemb,代码行数:7,代码来源:__init__.py

示例11: get_cluster_id

 def get_cluster_id(self, word):
     """
     Returns
     -------
        Returns id of cluster, which is contain the 'word'
     """
     return self.clustered_words[utils.to_unicode(word)]
开发者ID:nicolay-r,项目名称:tone-classifier,代码行数:7,代码来源:clustered_words.py

示例12: get_local_features

def get_local_features(token, word_freq=None):

    assert len(token) >= 1

    features = []
    
    ntoken = normalize(token, lowercase=False)

    if token.isalpha():

        if 'UpperCase' in features_on:
            if first_upper_case(ntoken):
                features += ['IsUpperCase']

        if 'AllUpperCase' in features_on:
            if all_upper_case(ntoken):
                features += ['IsAllUpperCase']

        if 'AllLowerCase' in features_on:
            if all_lower_case(ntoken):
                features += ['IsAllLowerCase']

        if 'Freq' in features_on:
            features += ['Freq:%s' % str(word_freq[ntoken])]
        
        if 'Rare' in features_on:
            if word_freq[ntoken] <= rare_thr:
                features += ['IsRare']

        if 'IsWord' in features_on:
            features += ['IsWord']

    elif token.isdigit():

        if 'Number' in features_on:
            features += ['IsNumber']

    elif token.isalnum():

        if 'AlphaNum' in features_on:
            features += ['IsAlphaNum']

    elif len(to_unicode(token)) == 1:

        if is_punct(token):
            if 'Separator' in features_on:
                features += ['IsSeparator']
        else:
            if 'NonAlphanum' in features_on:
                features += ['IsNonAlphanum']
    
    if 'Word' in features_on:
        if not any(x in features for x in ['IsNumber', 'IsAlphaNum']):
            features += ['W=%s' % ntoken]

    if 'Length' in features_on:
        features += ['Length:%s' % str(len(ntoken))]

    return features
开发者ID:donvel,项目名称:affiliations,代码行数:59,代码来源:export.py

示例13: get_terms_info

 def get_terms_info(self, term):
     """
     returns: dict
         amount of documents which includes 'term' for different sentiment
         classes and at all (DocVocabulary.ALL)
     """
     uterm = to_unicode(term)
     return self.terms_info[uterm]
开发者ID:nicolay-r,项目名称:tone-classifier,代码行数:8,代码来源:DocVocabulary.py

示例14: _check_same_origin

 def _check_same_origin(self, current_url):
     """
     检查两个URL是否同源
     """
     current_url = to_unicode(current_url)
     url_part = urlparse.urlparse(current_url)
     url_origin = (url_part.scheme, url_part.netloc)
     return url_origin == self.origin
开发者ID:BoyceYang,项目名称:wsbs,代码行数:8,代码来源:spider.py

示例15: __getitem__

    def __getitem__(self, name):
        '''Get a header value, from the message, decoded and as a
        unicode string.

        If the header does not exist, None is returned'''
        value = self._msg[name]
        if value is None:
            return None
        return u''.join(to_unicode(*tupl) for tupl in decode_header(value))
开发者ID:sirech,项目名称:deliver,代码行数:9,代码来源:simple.py


注:本文中的utils.to_unicode函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。