當前位置: 首頁>>代碼示例>>Python>>正文


Python feature.Word2Vec方法代碼示例

本文整理匯總了Python中pyspark.mllib.feature.Word2Vec方法的典型用法代碼示例。如果您正苦於以下問題:Python feature.Word2Vec方法的具體用法?Python feature.Word2Vec怎麽用?Python feature.Word2Vec使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pyspark.mllib.feature的用法示例。


在下文中一共展示了feature.Word2Vec方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_word2vec_setters

# 需要導入模塊: from pyspark.mllib import feature [as 別名]
# 或者: from pyspark.mllib.feature import Word2Vec [as 別名]
def test_word2vec_setters(self):
        model = Word2Vec() \
            .setVectorSize(2) \
            .setLearningRate(0.01) \
            .setNumPartitions(2) \
            .setNumIterations(10) \
            .setSeed(1024) \
            .setMinCount(3) \
            .setWindowSize(6)
        self.assertEqual(model.vectorSize, 2)
        self.assertTrue(model.learningRate < 0.02)
        self.assertEqual(model.numPartitions, 2)
        self.assertEqual(model.numIterations, 10)
        self.assertEqual(model.seed, 1024)
        self.assertEqual(model.minCount, 3)
        self.assertEqual(model.windowSize, 6) 
開發者ID:alec-heif,項目名稱:MIT-Thesis,代碼行數:18,代碼來源:tests.py

示例2: create_model_text

# 需要導入模塊: from pyspark.mllib import feature [as 別名]
# 或者: from pyspark.mllib.feature import Word2Vec [as 別名]
def create_model_text(self, data, params):

        learningRate = float(params.get('learningRate', 0.025))
        numIterations = int(params.get('numIterations', 10))
        minCount = int(params.get('minCount', 5))

        word2vec = Word2Vec()
        word2vec.setLearningRate(learningRate)
        word2vec.setNumIterations(numIterations)
        word2vec.setMinCount(minCount)

        inp = data.map(lambda row: row.split(" "))
        return word2vec.fit(inp) 
開發者ID:openstack,項目名稱:meteos,代碼行數:15,代碼來源:meteos-script-1.6.0.py

示例3: main

# 需要導入模塊: from pyspark.mllib import feature [as 別名]
# 或者: from pyspark.mllib.feature import Word2Vec [as 別名]
def main(in_loc, out_dir):
    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                        level=logging.INFO)

    sc = ps.SparkContext(appName='Word2Vec')
    logger.info('Distributing input data')
    raw_data = sc.textFile(in_loc).cache()
    data = raw_data.map(lambda line: line.split(' '))
    print(data.getNumPartitions())

    logger.info('Training Word2Vec model')
    model = Word2Vec().setVectorSize(128).setNumIterations(5).fit(data)

    w2v_dict = model.getVectors()
    logger.info('Saving word to vectors dictionary')
    with open(path.join(out_dir, 'w2v_dict.pkl'), 'wb') as f:
        cPickle.dump(w2v_dict, f, cPickle.HIGHEST_PROTOCOL)

    model.save(sc, out_dir) 
開發者ID:gushecht,項目名稱:noungroups,代碼行數:21,代碼來源:spark_word2vec.py

示例4: init_model_controller

# 需要導入模塊: from pyspark.mllib import feature [as 別名]
# 或者: from pyspark.mllib.feature import Word2Vec [as 別名]
def init_model_controller(self):

        model_type = self.job_args['model']['type']

        if model_type == 'KMeans':
            self.controller = KMeansModelController()
        elif model_type == 'Recommendation':
            self.controller = RecommendationController()
        elif model_type == 'LogisticRegression':
            self.controller = LogisticRegressionModelController()
        elif model_type == 'LinearRegression':
            self.controller = LinearRegressionModelController()
        elif model_type == 'RidgeRegression':
            self.controller = RidgeRegressionModelController()
        elif model_type == 'DecisionTreeRegression':
            self.controller = DecisionTreeModelController('Regression')
        elif model_type == 'DecisionTreeClassification':
            self.controller = DecisionTreeModelController('Classification')
        elif model_type == 'RandomForestRegression':
            self.controller = RandomForestModelController('Regression')
        elif model_type == 'RandomForestClassification':
            self.controller = RandomForestModelController('Classification')
        elif model_type == 'Word2Vec':
            self.controller = Word2VecModelController()
        elif model_type == 'FPGrowth':
            self.controller = FPGrowthModelController()
        elif model_type == 'NaiveBayes':
            self.controller = NaiveBayesModelController() 
開發者ID:openstack,項目名稱:meteos,代碼行數:30,代碼來源:meteos-script-1.6.0.py

示例5: test_word2vec_get_vectors

# 需要導入模塊: from pyspark.mllib import feature [as 別名]
# 或者: from pyspark.mllib.feature import Word2Vec [as 別名]
def test_word2vec_get_vectors(self):
        data = [
            ["a", "b", "c", "d", "e", "f", "g"],
            ["a", "b", "c", "d", "e", "f"],
            ["a", "b", "c", "d", "e"],
            ["a", "b", "c", "d"],
            ["a", "b", "c"],
            ["a", "b"],
            ["a"]
        ]
        model = Word2Vec().fit(self.sc.parallelize(data))
        self.assertEqual(len(model.getVectors()), 3) 
開發者ID:alec-heif,項目名稱:MIT-Thesis,代碼行數:14,代碼來源:tests.py

示例6: generate_word2vec_model

# 需要導入模塊: from pyspark.mllib import feature [as 別名]
# 或者: from pyspark.mllib.feature import Word2Vec [as 別名]
def generate_word2vec_model(doc):
    return Word2Vec().setVectorSize(10).setSeed(410).fit(doc) 
開發者ID:hanhanwu,項目名稱:Hanhan_Play_With_Social_Media,代碼行數:4,代碼來源:reddit_word2vec.py

示例7: test_word2vec_setters

# 需要導入模塊: from pyspark.mllib import feature [as 別名]
# 或者: from pyspark.mllib.feature import Word2Vec [as 別名]
def test_word2vec_setters(self):
        model = Word2Vec() \
            .setVectorSize(2) \
            .setLearningRate(0.01) \
            .setNumPartitions(2) \
            .setNumIterations(10) \
            .setSeed(1024) \
            .setMinCount(3)
        self.assertEqual(model.vectorSize, 2)
        self.assertTrue(model.learningRate < 0.02)
        self.assertEqual(model.numPartitions, 2)
        self.assertEqual(model.numIterations, 10)
        self.assertEqual(model.seed, 1024)
        self.assertEqual(model.minCount, 3) 
開發者ID:v-v-vishnevskiy,項目名稱:pyspark,代碼行數:16,代碼來源:tests.py


注:本文中的pyspark.mllib.feature.Word2Vec方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。