本文整理汇总了Python中pandas.notnull方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.notnull方法的具体用法?Python pandas.notnull怎么用?Python pandas.notnull使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.notnull方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _create_observation_variable
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def _create_observation_variable(individual_selections, choices, partsworth):
"""
This function handles creating the PyMC3 observation variables. It also gracefully handles missing observations in individual selections.
`individual_selections` is a Series of the individuals selections made, starting from 0. It can contain NaNs which represent answer was not provided.
`choices` is a DataFrame with a hierarchical index: level=0 enumerates the choices, and level=1 displays the profile at a specific choice.
It's size is (n_questions, n_choices_per_question).
`partsworth` is a slice of PyMC3 matrix. It represents the partsworth variables of a individual. Size is (n_profiles,)
This computes the values exp(partsworth * profile_j) / sum[ exp(partsworth * profile_k ] for all j.
"""
nan_mask = pd.notnull(individual_selections)
return pm.Categorical("Obs_%s" % individual_selections.name,
tt.nnet.softmax(tt.stack([
tt.dot(choice.values, partsworth) for _, choice in choices[nan_mask.values].groupby(axis=1, level=0)
], axis=0).T),
observed=individual_selections[nan_mask.values].values)
示例2: fillna
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def fillna(series_or_arr, missing_value=0.0):
"""Fill missing values in pandas objects and numpy arrays.
Arguments
---------
series_or_arr : pandas.Series, numpy.ndarray
The numpy array or pandas series for which the missing values
need to be replaced.
missing_value : float, int, str
The value to replace the missing value with. Default 0.0.
Returns
-------
pandas.Series, numpy.ndarray
The numpy array or pandas series with the missing values
filled.
"""
if pandas.notnull(missing_value):
if isinstance(series_or_arr, (numpy.ndarray)):
series_or_arr[numpy.isnan(series_or_arr)] = missing_value
else:
series_or_arr.fillna(missing_value, inplace=True)
return series_or_arr
示例3: load_metadata
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def load_metadata(self):
try:
tasks_filename, metadata_filename = self._get_metadata_filename()
with open(tasks_filename) as fin:
tasks = json.load(fin)
metadata_df = pd.read_csv(metadata_filename, compression='gzip')
metadata_df = metadata_df.where((pd.notnull(metadata_df)), None)
return tasks, metadata_df
except Exception as e:
pass
# Load obsolete format -> save in new format
metadata_filename = os.path.join(self.data_dir, "metadata.joblib")
if os.path.exists(metadata_filename):
tasks, metadata_df = load_from_disk(metadata_filename)
del metadata_df['task_names']
del metadata_df['basename']
save_metadata(tasks, metadata_df, self.data_dir)
return tasks, metadata_df
raise ValueError("No Metadata Found On Disk")
示例4: _make_json_dataset
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def _make_json_dataset(self, edges, nodes, name):
(elist, nlist) = self._bind_attributes_v1(edges, nodes)
edict = elist.where((pandas.notnull(elist)), None).to_dict(orient='records')
bindings = {'idField': self._node or Plotter._defaultNodeId,
'destinationField': self._destination, 'sourceField': self._source}
dataset = {'name': PyGraphistry._config['dataset_prefix'] + name,
'bindings': bindings, 'type': 'edgelist', 'graph': edict}
if nlist is not None:
ndict = nlist.where((pandas.notnull(nlist)), None).to_dict(orient='records')
dataset['labels'] = ndict
return dataset
# Main helper for creating ETL2 payload
示例5: objectEncoder
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def objectEncoder(vg, series, dtype):
series.where(pandas.notnull(series), '\0', inplace=True)
# vec is a string[] submessage within a repeated
vec = vg.string_vectors.add()
str_series = None
try:
str_series = series.astype('unicode')
except UnicodeDecodeError:
warnings.warn("Warning: escaping unicode")
str_series = series.apply(lambda v: v.decode('utf-8'))
for val in str_series:
vec.values.append(val)
return (vec, {'ctype': 'utf8'})
# NaN (as well as Infinity and undefined) are valid JSON. Use this guard to filter
# them out when creating the json metadata.
示例6: test_pertmeth
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def test_pertmeth(self):
# Test with specified perturbation method.
df = gendat()
orig = df.copy()
mx = pd.notnull(df)
nrow, ncol = df.shape
for pert_meth in "gaussian", "boot":
imp_data = mice.MICEData(df, perturbation_method=pert_meth)
for k in range(2):
imp_data.update_all()
assert_equal(imp_data.data.shape[0], nrow)
assert_equal(imp_data.data.shape[1], ncol)
assert_allclose(orig[mx], imp_data.data[mx])
assert_equal(imp_data._cycle_order, ['x5', 'x3', 'x4', 'y', 'x2', 'x1'])
示例7: uniprot_reviewed_checker
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def uniprot_reviewed_checker(uniprot_id):
"""Check if a single UniProt ID is reviewed or not.
Args:
uniprot_id:
Returns:
bool: If the entry is reviewed
"""
query_string = 'id:' + uniprot_id
uni_rev_raw = StringIO(bsup.search(query_string, columns='id,reviewed', frmt='tab'))
uni_rev_df = pd.read_table(uni_rev_raw, sep='\t', index_col=0)
uni_rev_df = uni_rev_df.fillna(False)
uni_rev_df = uni_rev_df[pd.notnull(uni_rev_df.Status)]
uni_rev_df = uni_rev_df.replace(to_replace="reviewed", value=True)
uni_rev_df = uni_rev_df.replace(to_replace="unreviewed", value=False)
uni_rev_dict_adder = uni_rev_df.to_dict()['Status']
return uni_rev_dict_adder[uniprot_id]
示例8: parse_psqs
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def parse_psqs(psqs_results_file):
"""Parse a PSQS result file and returns a Pandas DataFrame of the results
Args:
psqs_results_file: Path to psqs results file
Returns:
Pandas DataFrame: Summary of PSQS results
"""
# TODO: generalize column names for all results, save as dict instead
psqs_results = pd.read_csv(psqs_results_file, sep='\t', header=None)
psqs_results['pdb_file'] = psqs_results[0].apply(lambda x: str(x).strip('./').strip('.pdb'))
psqs_results = psqs_results.rename(columns = {1:'psqs_local', 2:'psqs_burial', 3:'psqs_contact', 4:'psqs_total'}).drop(0, axis=1)
psqs_results['u_pdb'] = psqs_results['pdb_file'].apply(lambda x: x.upper() if len(x)==4 else np.nan)
psqs_results['i_entry_name'] = psqs_results['pdb_file'].apply(lambda x: x.split('_model1')[0] if len(x)>4 else np.nan)
psqs_results = psqs_results[pd.notnull(psqs_results.psqs_total)]
return psqs_results
示例9: execute_internal
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def execute_internal(self, context, **kwargs):
"""
the internal execution process to be implemented
:param context:
:param kwargs:
:return:
"""
df = pd.read_csv('https://raw.githubusercontent.com/bailaohe/parade/master/assets/movie_metadata.csv')
# Process projection on the dataset to get our interested attributes
df = df[['movie_title', 'genres', 'title_year', 'content_rating', 'budget', 'num_voted_users', 'imdb_score']]
# Filter out records with *NAN* title_year and budget
df = df[pd.notnull(df['title_year'])]
df = df[df['budget'] > 0]
# Extract the genres ROOT
df['genres_root'] = df['genres'].apply(lambda g: g.split('|')[0])
return df
示例10: test_df_equivalent_after_sql
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def test_df_equivalent_after_sql(self):
# Parse the CSV
df_source = services.load_df_from_csvfile(
io.StringIO(self.csv1),
0,
0)
# Store the DF in the DB
pandas.store_table(df_source, self.table_name)
# Load it from the DB
df_dst = pandas.load_table(self.table_name)
# NaN in boolean columns are now None
df_source['bool1'] = df_source['bool1'].where(
pd.notnull(df_source['bool1']),
None)
df_source['bool2'] = df_source['bool2'].where(
pd.notnull(df_source['bool2']),
None)
# Data frames mut be identical
assert df_source.equals(df_dst)
示例11: clean_and_write_dataframe_to_csv
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def clean_and_write_dataframe_to_csv(data, filename):
"""
Cleans a dataframe of np.NaNs and saves to file via pandas.to_csv
:param data: data to write to CSV
:type data: :class:`pandas.DataFrame`
:param filename: Path to file to write CSV to. if None, string of data
will be returned
:type filename: str | None
:return: If the filename is None, returns the string of data. Otherwise
returns None.
:rtype: str | None
"""
# cleans np.NaN values
data = data.where((pd.notnull(data)), None)
# If filename=None, to_csv will return a string
result = data.to_csv(path_or_buf=filename, encoding='utf-8', dtype=str, index=False, na_rep=None,
skipinitialspace=True, quoting=csv.QUOTE_ALL)
logging.info("Dataframe of shape %s has been stored." % str(data.shape))
return result
示例12: pre_processing
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def pre_processing(self, labels):
# removing NaN in lat and lon
self.raw_data = self.raw_data.loc[pd.notnull(self.raw_data.lat), :]
self.raw_data = self.raw_data.loc[pd.notnull(self.raw_data.lon), :]
for label in labels:
self.raw_data = self.raw_data.loc[pd.notnull(self.raw_data[label]), :]
"""
lat_= self.raw_data.lat.rolling(3, min_periods=1).median()
self.raw_data.assign(lat=lat_)
lon_ = self.raw_data.lon.rolling(3, min_periods=1).median()
self.raw_data.assign(lot=lon_)
self.raw_data = self.raw_data.loc[pd.notnull(self.raw_data.lat), :]
self.raw_data = self.raw_data.loc[pd.notnull(self.raw_data.lon), :]
"""
return None
示例13: generate_avro
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def generate_avro(src_file: Text, output_file: Text):
"""Generates avro file based on src file.
Args:
src_file: path to Chicago taxi dataset.
output_file: output path for avro file.
"""
df = pd.read_csv(src_file)
# Replaces NaN's with None's for avroWriter to interpret null values
df = df.where((pd.notnull(df)), None)
records = df.to_dict(orient='records')
parsed_schema = fastavro.parse_schema(get_schema())
with open(output_file, 'wb') as f:
fastavro.writer(f, parsed_schema, records)
示例14: load_geodataframe
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def load_geodataframe(geo_filename):
"""
Load input GeoDataFrame
Parameters
----------
geo_filename : string
input GeoDataFrame filename
Returns
----------
geopandas.GeoDataFrame
loaded data
"""
# Load using geopandas
df_osm_data = gpd.read_file(geo_filename)
# Set None as NaN
df_osm_data.fillna(value=np.nan, inplace=True)
# Replace empty string (Json NULL sometimes read as '') for NaN
df_osm_data.replace('', np.nan, inplace=True)
def list_int_from_string(x): # List of integers given input in string format
return [ int(id_) for id_ in x.split(",") ]
def list_str_from_string(x): # List of strings given input in string format
return x.split(",")
# Recover list
if ( "activity_category" in df_osm_data.columns):
df_osm_data[ "activity_category" ] = df_osm_data.activity_category.apply(lambda x: list_str_from_string(x) if pd.notnull(x) else np.nan )
if ( "containing_parts" in df_osm_data.columns):
df_osm_data[ "containing_parts" ] = df_osm_data.containing_parts.apply( lambda x: list_int_from_string(x) if pd.notnull(x) else np.nan )
if ( "containing_poi" in df_osm_data.columns):
df_osm_data[ "containing_poi" ] = df_osm_data.containing_poi.apply( lambda x: list_int_from_string(x) if pd.notnull(x) else np.nan )
# To UTM coordinates
return ox.project_gdf( df_osm_data )
示例15: _make_index_names
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import notnull [as 别名]
def _make_index_names(self, name1, name2):
if pandas.notnull(name1) and pandas.notnull(name2) and \
(name1 == name2):
return ["{}{}".format(name1, self.suffixes[0]),
"{}{}".format(name1, self.suffixes[1])]
else:
return [name1, name2]