Python pandas.isnull方法代码示例

本文整理汇总了Python中pandas.isnull方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.isnull方法的具体用法?Python pandas.isnull怎么用?Python pandas.isnull使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas的用法示例。


示例1: compute_mAP_N

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def compute_mAP_N(result,this_cls_pred,this_cls_gt):
    ap = np.zeros(len(result.tiou_thresholds))
    tp = np.zeros((len(result.tiou_thresholds), len(this_cls_pred)))
    fp = np.zeros((len(result.tiou_thresholds), len(this_cls_pred)))

    for tidx, tiou in enumerate(result.tiou_thresholds): 
        fp[tidx,pd.isnull(this_cls_pred[result.matched_gt_id_cols[tidx]]).values] = 1
        tp[tidx,~(pd.isnull(this_cls_pred[result.matched_gt_id_cols[tidx]]).values)] = 1

    tp_cumsum = np.cumsum(tp, axis=1).astype(np.float)
    fp_cumsum = np.cumsum(fp, axis=1).astype(np.float)
    recall_cumsum = tp_cumsum / len(np.unique(this_cls_gt['gt-id']))
    precision_cumsum = recall_cumsum * result.average_num_instance_per_class / (recall_cumsum * result.average_num_instance_per_class + fp_cumsum)

    for tidx in range(len(result.tiou_thresholds)):
        ap[tidx] = interpolated_prec_rec(precision_cumsum[tidx,:], recall_cumsum[tidx,:])
    return ap.mean()

# Initialize true positive and false positive vectors. 

示例2: SetDistribution

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def SetDistribution(self, distinct_values):
        """This is all the values this column will ever see."""
        assert self.all_distinct_values is None
        # pd.isnull returns true for both np.nan and np.datetime64('NaT').
        is_nan = pd.isnull(distinct_values)
        contains_nan = np.any(is_nan)
        dv_no_nan = distinct_values[~is_nan]
        # NOTE: np.sort puts NaT values at beginning, and NaN values at end.
        # For our purposes we always add any null value to the beginning.
        vs = np.sort(np.unique(dv_no_nan))
        if contains_nan and np.issubdtype(distinct_values.dtype, np.datetime64):
            vs = np.insert(vs, 0, np.datetime64('NaT'))
        elif contains_nan:
            vs = np.insert(vs, 0, np.nan)
        if self.distribution_size is not None:
            assert len(vs) == self.distribution_size
        self.all_distinct_values = vs
        self.distribution_size = len(vs)
        return self 

示例3: _compute_vectorized

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def _compute_vectorized(self, s_left, s_right):

        # Values or agree/disagree
        if self.agree_value == 'value':
            compare = s_left.copy()
            compare[s_left != s_right] = self.disagree_value

            compare = pandas.Series(self.disagree_value, index=s_left.index)
            compare[s_left == s_right] = self.agree_value

        # Only when disagree value is not identical with the missing value
        if self.disagree_value != self.missing_value:
            compare[(s_left.isnull() | s_right.isnull())] = self.missing_value

        return compare 

示例4: _compute_frequency

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def _compute_frequency(self, col):

        # https://github.com/pydata/pandas/issues/3729
        na_value = 'NAN'
        value_count = col.fillna(na_value)

        c = value_count.groupby(by=value_count).transform('count')
        c = c.astype(numpy.float64)

        if self.normalise:
            c = c / len(col)

        # replace missing values
        c[col.isnull()] = self.missing_value

        return c 

示例5: jarowinkler_similarity

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def jarowinkler_similarity(s1, s2):

    conc = pandas.Series(list(zip(s1, s2)))

    from jellyfish import jaro_winkler

    def jaro_winkler_apply(x):

            return jaro_winkler(x[0], x[1])
        except Exception as err:
            if pandas.isnull(x[0]) or pandas.isnull(x[1]):
                return np.nan
                raise err

    return conc.apply(jaro_winkler_apply) 

示例6: levenshtein_similarity

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def levenshtein_similarity(s1, s2):

    conc = pandas.Series(list(zip(s1, s2)))

    from jellyfish import levenshtein_distance

    def levenshtein_apply(x):

            return 1 - levenshtein_distance(x[0], x[1]) \
                / np.max([len(x[0]), len(x[1])])
        except Exception as err:
            if pandas.isnull(x[0]) or pandas.isnull(x[1]):
                return np.nan
                raise err

    return conc.apply(levenshtein_apply) 

示例7: damerau_levenshtein_similarity

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def damerau_levenshtein_similarity(s1, s2):

    conc = pandas.Series(list(zip(s1, s2)))

    from jellyfish import damerau_levenshtein_distance

    def damerau_levenshtein_apply(x):

            return 1 - damerau_levenshtein_distance(x[0], x[1]) \
                / np.max([len(x[0]), len(x[1])])
        except Exception as err:
            if pandas.isnull(x[0]) or pandas.isnull(x[1]):
                return np.nan
                raise err

    return conc.apply(damerau_levenshtein_apply) 

示例8: detect_integer

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def detect_integer(e):
    if e == '' or pd.isnull(e): return False

        if integer_regex.match(e): return True
            if float(e).is_integer(): return True
                for l in locales:
                    locale.setlocale(locale.LC_all, l)
                    if float(locale.atoi(e)).is_integer(): return True
    return False 

示例9: detect_decimal

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def detect_decimal(e):
    if e == '' or pd.isnull(e): return False

    if decimal_regex.match(e):
        return True
        d = Decimal(e)
        return True
            for l in locales:
                locale.setlocale(locale.LC_all, l)          
                value = locale.atof(e)
                if sys.version_info < (2, 7):
                    value = str(e)
                return Decimal(e)
    return False 

示例10: _recmat_exact

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def _recmat_exact(presented, recalled, features):
    lists = presented.index.get_values()
    cols = max(presented.shape[1], recalled.shape[1])
    result = np.empty((presented.shape[0], cols))*np.nan
    for li, l in enumerate(lists):
        p_list = presented.loc[l]
        r_list = recalled.loc[l]
        for i, feature in enumerate(features):
            get_feature = lambda x: np.array(x[feature]) if not np.array(pd.isnull(x['item'])).any() else np.nan
            p = np.vstack(p_list.apply(get_feature).get_values())
            r = r_list.dropna().apply(get_feature).get_values()
            r = np.vstack(list(filter(lambda x: x is not np.nan, r)))
                m = [np.where((p==x).all(axis=1))[0] for x in r]
            except AttributeError:
                m = []
            result[li, :len(m)] = [x[0]+1 if len(x)>0 else np.nan for x in m]
    return result 

示例11: test_conversions

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def test_conversions(data_missing):

    # astype to object series
    df = pd.DataFrame({'A': data_missing})
    result = df['A'].astype('object')
    expected = pd.Series(np.array([np.nan, 1], dtype=object), name='A')
    tm.assert_series_equal(result, expected)

    # convert to object ndarray
    # we assert that we are exactly equal
    # including type conversions of scalars
    result = df['A'].astype('object').values
    expected = np.array([np.nan, 1], dtype=object)
    tm.assert_numpy_array_equal(result, expected)

    for r, e in zip(result, expected):
        if pd.isnull(r):
            assert pd.isnull(e)
        elif is_integer(r):
            # PY2 can be int or long
            assert r == e
            assert is_integer(e)
            assert r == e
            assert type(r) == type(e) 

示例12: solve

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def solve(self, solver='glpk', verbose=False, keepfiles=False, resolve=False, **kwargs):
        if solver == 'xpress':
            resolve = True

        solve_model(self._model, solver=solver, verbose=verbose, keepfiles=keepfiles, **kwargs)
        self._results = PSSTResults(self)

        if resolve:
            for t, row in self.results.unit_commitment.iterrows():
                for g, v in row.iteritems():
                    if not pd.isnull(v):
                        self._model.UnitOn[g, t].fixed = True
                        self._model.UnitOn[g, t] = int(float(v))

            solve_model(self._model, solver=solver, verbose=verbose, keepfiles=keepfiles, is_mip=False, **kwargs)
            self._results = PSSTResults(self)

        self._status = 'solved' 

示例13: test_gene

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def test_gene(self):

        dataframe = cellphonedb_app.cellphonedb.database_manager.get_repository(

        data_not_match = False

        for gene in gene_entries:
            db_gene = dataframe

            for column_name in gene:
                if gene[column_name] == None:
                    db_gene = db_gene[pd.isnull(db_gene[column_name])]
                    db_gene = db_gene[db_gene[column_name] == gene[column_name]]

            if (len(db_gene) < 1):
                app_logger.warning('Failed cheking Gene:')
                app_logger.warning('Expected data:')
                data_not_match = True

        self.assertFalse(data_not_match, 'Some Gene doesnt match') 

示例14: test_calc_stats

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def test_calc_stats():
    # test twelve_month_win_perc divide by zero
    prices = df.C['2010-10-01':'2011-08-01']
    stats = ffn.calc_stats(prices).stats
    assert pd.isnull(stats['twelve_month_win_perc'])
    prices = df.C['2009-10-01':'2011-08-01']
    stats = ffn.calc_stats(prices).stats
    assert not pd.isnull(stats['twelve_month_win_perc'])

    # test yearly_sharpe divide by zero
    prices = df.C['2009-01-01':'2012-01-01']
    stats = ffn.calc_stats(prices).stats
    assert 'yearly_sharpe' in stats.index

    prices[prices > 0.0] = 1.0
    # throws warnings
    stats = ffn.calc_stats(prices).stats
    assert pd.isnull(stats['yearly_sharpe']) 

示例15: _mode

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import isnull [as 别名]
def _mode(x, def_fill=ImputerMixin._def_fill):
    """Get the most common value in a 1d
    H2OFrame. Ties will be handled in a non-specified


    x : ``H2OFrame``, shape=(n_samples, 1)
        The 1d frame from which to derive the mode
    idx = x.as_data_frame(use_pandas=True)[x.columns[0]].value_counts().index

    # if the most common is null, then return the next most common.
    # if there is no next common (i.e., 100% null) then we return the def_fill
    return idx[0] if not pd.isnull(idx[0]) else idx[1] if idx.shape[0] > 1 else def_fill 
