当前位置: 首页>>代码示例>>Python>>正文


Python DataFrame.roles方法代码示例

本文整理汇总了Python中odps.df.DataFrame.roles方法的典型用法代码示例。如果您正苦于以下问题:Python DataFrame.roles方法的具体用法?Python DataFrame.roles怎么用?Python DataFrame.roles使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在odps.df.DataFrame的用法示例。


在下文中一共展示了DataFrame.roles方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_df_consecutive

# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import roles [as 别名]
 def test_df_consecutive(self):
     self.create_ionosphere(IONOSPHERE_TABLE)
     df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE))
     df = df[df['a04'] != 0]
     df = df.roles(label='class')
     df.head(10)
     train, test = df.split(0.6)
     lr = LogisticRegression(epsilon=0.01)
     model = lr.train(train)
     predicted = model.predict(test)
     predicted.to_pandas()
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:13,代码来源:test_dataframe.py

示例2: test_df_combined

# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import roles [as 别名]
 def test_df_combined(self):
     self.create_ionosphere(IONOSPHERE_TABLE)
     df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE))
     df = df[df['a04'] != 0]
     df = df['a01', df.a05.map(lambda v: v * 2).rename('a05'), 'a06', 'class']
     df = df.roles(label='class')
     df = df[df.a05 != 0].cache()
     df = df[df.a05, ((df.a06 + 1) / 2).rename('a06'), 'class']
     train, test = df.split(0.6)
     lr = LogisticRegression(epsilon=0.01)
     model = lr.train(train)
     predicted = model.predict(test)
     (- 1.0 * ((predicted['class'] * predicted.prediction_score.log().rename('t')).rename('t1') + (
     (1 - predicted['class']) * (1 - predicted.prediction_score).log().rename('t0')).rename('t2')).rename(
         't3').sum() / predicted.prediction_score.count()).rename('t4').execute()
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:17,代码来源:test_dataframe.py

示例3: Test

# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import roles [as 别名]
class Test(MLTestBase):
    def setUp(self):
        super(Test, self).setUp()
        self.create_iris(IRIS_TABLE)
        self.df = DataFrame(self.odps.get_table(IRIS_TABLE))

    def test_coll_field_operations(self):
        # select_features
        self.assertRaises(ValueError, lambda: self.df.select_features())
        df2 = self.df.select_features("sepal_length sepal_width petal_length")
        self.assertEqual(
            _df_roles(df2),
            dict(category="", sepal_width="FEATURE", sepal_length="FEATURE", petal_length="FEATURE", petal_width=""),
        )
        df3 = df2.select_features("petal_width", add=True)
        self.assertEqual(
            _df_roles(df3),
            dict(
                category="",
                sepal_width="FEATURE",
                sepal_length="FEATURE",
                petal_length="FEATURE",
                petal_width="FEATURE",
            ),
        )
        # exclude_fields
        self.assertRaises(ValueError, lambda: self.df.exclude_fields())
        df4 = df3.exclude_fields("sepal_length sepal_width")
        self.assertEqual(
            _df_roles(df4),
            dict(category="", sepal_width="", sepal_length="", petal_length="FEATURE", petal_width="FEATURE"),
        )
        # weight_field
        self.assertRaises(ValueError, lambda: self.df.weight_field(None))
        df5 = df3.weight_field("sepal_width")
        self.assertEqual(
            _df_roles(df5),
            dict(
                category="", sepal_width="WEIGHT", sepal_length="FEATURE", petal_length="FEATURE", petal_width="FEATURE"
            ),
        )
        # label_field
        self.assertRaises(ValueError, lambda: self.df.label_field(None))
        df6 = self.df.label_field("category")
        self.assertEqual(
            _df_roles(df6),
            dict(
                category="LABEL",
                sepal_width="FEATURE",
                sepal_length="FEATURE",
                petal_length="FEATURE",
                petal_width="FEATURE",
            ),
        )
        # roles
        self.assertIs(self.df, self.df.roles())
        df7 = self.df.roles(label="category", weight="sepal_width")
        self.assertEqual(
            _df_roles(df7),
            dict(
                category="LABEL",
                petal_length="FEATURE",
                petal_width="FEATURE",
                sepal_width="WEIGHT",
                sepal_length="FEATURE",
            ),
        )
        # discrete
        df8 = self.df.discrete("sepal_width, sepal_length")
        self.assertEqual(
            _df_continuity(df8),
            dict(
                category="DISCRETE",
                sepal_width="DISCRETE",
                sepal_length="DISCRETE",
                petal_length="CONTINUOUS",
                petal_width="CONTINUOUS",
            ),
        )
        # continuous
        df9 = df8.continuous("sepal_width")
        self.assertEqual(
            _df_continuity(df9),
            dict(
                category="DISCRETE",
                sepal_width="CONTINUOUS",
                sepal_length="DISCRETE",
                petal_length="CONTINUOUS",
                petal_width="CONTINUOUS",
            ),
        )
        # key_value
        df10 = self.df.key_value("sepal_length sepal_width")
        self.assertEqual(
            _df_key_value(df10),
            dict(
                category="",
                petal_length="",
                petal_width="",
                sepal_width="KVConfig(kv=:, item=,)",
#.........这里部分代码省略.........
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:103,代码来源:test_mixin.py

示例4: test_dynamic_output

# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import roles [as 别名]
 def test_dynamic_output(self):
     self.create_ionosphere(IONOSPHERE_TABLE)
     df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE))
     df = df.roles(label=df['class'])
     filtered, importance = select_features(df)
     print(filtered.describe().execute())
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:8,代码来源:test_dataframe.py

示例5: Test

# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import roles [as 别名]
class Test(MLTestBase):
    def setUp(self):
        super(Test, self).setUp()
        self.create_corpus(CORPUS_TABLE)
        self.df = DataFrame(self.odps.get_table(CORPUS_TABLE)).roles(doc_id='id', doc_content='content')

        options.runner.dry_run = True

    def _create_str_compare_table(self, table_name):
        data_rows = [
            ['inputTableName', 'inputTableName'], ['outputTableName', 'mapTableName'],
            ['inputSelectedColName1', 'outputTableName'], ['inputSelectedColName2', 'inputSelectedColName'],
            ['inputAppendColNames', 'mapSelectedColName'], ['inputTablePartitions', 'inputAppendColNames'],
            ['outputColName', 'inputAppendRenameColNames'], ['method', 'mapAppendColNames'],
            ['lambda', 'mapAppendRenameColNames'], ['k', 'inputTablePartitions'],
            ['lifecycle', 'mapTablePartitions'], ['coreNum', 'outputColName'], ['memSizePerCore', 'method'],
        ]
        for idx, r in enumerate(data_rows):
            data_rows[idx] = [idx] + r
        self.odps.execute_sql('drop table if exists ' + table_name)
        self.odps.execute_sql('create table %s (str_id bigint, col1 string, col2 string)' % table_name)
        self.odps.write_table(table_name, data_rows)

    def _create_noise_table(self, table_name):
        data_rows = (u',', u'。', u'《', u'》', u'的', u'是')
        data_rows = [[v] for v in data_rows]
        self.odps.execute_sql('drop table if exists ' + table_name)
        self.odps.execute_sql('create table %s (noise_col string)' % table_name)
        self.odps.write_table(table_name, data_rows)

    def test_tf_idf(self):
        splited = SplitWord().transform(self.df)
        freq, _ = DocWordStat().transform(splited)
        tf_set = TFIDF().transform(freq)
        tf_set._add_case(self.gen_check_params_case({
            'docIdCol': 'id', 'inputTableName': TEMP_TABLE_PREFIX + '0_doc_word_stat_3_1', 'countCol': 'count',
            'outputTableName': TFIDF_TABLE, 'wordCol': 'word'}))
        tf_set.persist(TFIDF_TABLE)

    def test_str_diff(self):
        self._create_str_compare_table(STR_COMP_TABLE)
        df = DataFrame(self.odps.get_table(STR_COMP_TABLE))
        diff_df = str_diff(df, col1='col1', col2='col2')
        diff_df._add_case(self.gen_check_params_case({
            'inputTableName': STR_COMP_TABLE, 'k': '2', 'outputTableName': COMP_RESULT_TABLE,
            'inputSelectedColName2': 'col2', 'inputSelectedColName1': 'col1', 'method': 'levenshtein_sim',
            'lambda': '0.5', 'outputColName': 'output'}))
        diff_df.persist(COMP_RESULT_TABLE)

    def test_top_n(self):
        self._create_str_compare_table(STR_COMP_TABLE)
        df = DataFrame(self.odps.get_table(STR_COMP_TABLE))
        top_n_df = top_n_similarity(df, df, col='col1', map_col='col1')
        top_n_df._add_case(self.gen_check_params_case({
            'inputTableName': STR_COMP_TABLE, 'k': '2', 'outputColName': 'output',
            'mapSelectedColName': 'col1', 'topN': '10', 'inputSelectedColName': 'col1',
            'outputTableName': TOP_N_TABLE, 'mapTableName': STR_COMP_TABLE,
            'method': 'levenshtein_sim', 'lambda': '0.5'}))
        top_n_df.persist(TOP_N_TABLE)

    def test_filter_noises(self):
        self.odps.delete_table(FILTERED_WORDS_TABLE, if_exists=True)

        self.create_splited_words(SPLITED_TABLE)
        self._create_noise_table(NOISE_TABLE)
        df = DataFrame(self.odps.get_table(SPLITED_TABLE)).roles(doc_content='content')
        ndf = DataFrame(self.odps.get_table(NOISE_TABLE))
        filtered = filter_noises(df, ndf)
        filtered._add_case(self.gen_check_params_case({
            'noiseTableName': NOISE_TABLE, 'outputTableName': FILTERED_WORDS_TABLE,
            'selectedColNames': 'content', 'inputTableName': SPLITED_TABLE}))
        filtered.persist(FILTERED_WORDS_TABLE)

    def test_keywords_extraction(self):
        self.odps.delete_table(KW_EXTRACTED_TABLE, if_exists=True)
        self.create_splited_words(SPLITED_TABLE)
        df = DataFrame(self.odps.get_table(SPLITED_TABLE)).roles(doc_id='doc_id', doc_content='content')
        extracted = extract_keywords(df)
        extracted._add_case(self.gen_check_params_case(
            {'dumpingFactor': '0.85', 'inputTableName': SPLITED_TABLE, 'epsilon': '0.000001', 'windowSize': '2',
             'topN': '5', 'outputTableName': KW_EXTRACTED_TABLE, 'docIdCol': 'doc_id', 'maxIter': '100',
             'docContent': 'content'}))
        extracted.persist(KW_EXTRACTED_TABLE)

    def test_summarize_text(self):
        self.create_corpus(CORPUS_TABLE)
        summarized = summarize_text(self.df.roles(sentence='content'))
        summarized._add_case(self.gen_check_params_case(
            {'dumpingFactor': '0.85', 'inputTableName': CORPUS_TABLE, 'sentenceCol': 'content',
             'epsilon': '0.000001', 'k': '2', 'topN': '3', 'outputTableName': TEXT_SUMMARIZED_TABLE,
             'docIdCol': 'id', 'maxIter': '100', 'similarityType': 'lcs_sim', 'lambda': '0.5'}))
        summarized.persist(TEXT_SUMMARIZED_TABLE)

    def test_count_ngram(self):
        self.create_word_triple(WORD_TRIPLE_TABLE)
        word_triple_df = DataFrame(self.odps.get_table(WORD_TRIPLE_TABLE)).select_features('word')
        counted = count_ngram(word_triple_df)
        counted._add_case(self.gen_check_params_case({
            'outputTableName': COUNT_NGRAM_TABLE, 'inputSelectedColNames': 'word', 'order': '3',
            'inputTableName': WORD_TRIPLE_TABLE}))
#.........这里部分代码省略.........
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:103,代码来源:test_text_algo.py


注:本文中的odps.df.DataFrame.roles方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。