本文整理汇总了Python中pandas.read_json方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.read_json方法的具体用法?Python pandas.read_json怎么用?Python pandas.read_json使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.read_json方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_mnli_pandas_df
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def load_mnli_pandas_df(local_cache_path=".", file_split="train"):
"""Loads extracted test_utils into pandas
Args:
local_cache_path ([type], optional): [description].
Defaults to current working directory.
file_split (str, optional): The subset to load.
One of: {"train", "dev_matched", "dev_mismatched"}
Defaults to "train".
Returns:
pd.DataFrame: pandas DataFrame containing the specified
MultiNLI subset.
"""
try:
download_file_and_extract(local_cache_path, file_split)
except Exception as e:
raise e
return pd.read_json(
os.path.join(local_cache_path, DATA_FILES[file_split]), lines=True
)
示例2: test_read_jsonl_unicode_chars
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def test_read_jsonl_unicode_chars():
# GH15132: non-ascii unicode characters
# \u201d == RIGHT DOUBLE QUOTATION MARK
# simulate file handle
json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
json = StringIO(json)
result = read_json(json, lines=True)
expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
columns=['a', 'b'])
assert_frame_equal(result, expected)
# simulate string
json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
result = read_json(json, lines=True)
expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
columns=['a', 'b'])
assert_frame_equal(result, expected)
示例3: test_to_jsonl
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def test_to_jsonl():
# GH9180
df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
result = df.to_json(orient="records", lines=True)
expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
assert result == expected
df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
result = df.to_json(orient="records", lines=True)
expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
assert result == expected
assert_frame_equal(read_json(result, lines=True), df)
# GH15096: escaped characters in columns and data
df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]],
columns=["a\\", 'b'])
result = df.to_json(orient="records", lines=True)
expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n'
'{"a\\\\":"foo\\"","b":"bar"}')
assert result == expected
assert_frame_equal(read_json(result, lines=True), df)
示例4: test_readjson_chunks_multiple_empty_lines
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def test_readjson_chunks_multiple_empty_lines(chunksize):
j = """
{"A":1,"B":4}
{"A":2,"B":5}
{"A":3,"B":6}
"""
orig = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
test = pd.read_json(j, lines=True, chunksize=chunksize)
if chunksize is not None:
test = pd.concat(test)
tm.assert_frame_equal(
orig, test, obj="chunksize: {chunksize}".format(chunksize=chunksize))
示例5: test_v12_compat
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def test_v12_compat(self):
df = DataFrame(
[[1.56808523, 0.65727391, 1.81021139, -0.17251653],
[-0.2550111, -0.08072427, -0.03202878, -0.17581665],
[1.51493992, 0.11805825, 1.629455, -1.31506612],
[-0.02765498, 0.44679743, 0.33192641, -0.27885413],
[0.05951614, -2.69652057, 1.28163262, 0.34703478]],
columns=['A', 'B', 'C', 'D'],
index=pd.date_range('2000-01-03', '2000-01-07'))
df['date'] = pd.Timestamp('19920106 18:21:32.12')
df.iloc[3, df.columns.get_loc('date')] = pd.Timestamp('20130101')
df['modified'] = df['date']
df.iloc[1, df.columns.get_loc('modified')] = pd.NaT
v12_json = os.path.join(self.dirpath, 'tsframe_v012.json')
df_unser = pd.read_json(v12_json)
assert_frame_equal(df, df_unser)
df_iso = df.drop(['modified'], axis=1)
v12_iso_json = os.path.join(self.dirpath, 'tsframe_iso_v012.json')
df_unser_iso = pd.read_json(v12_iso_json)
assert_frame_equal(df_iso, df_unser_iso)
示例6: test_date_format_frame
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def test_date_format_frame(self):
df = self.tsframe.copy()
def test_w_date(date, date_unit=None):
df['date'] = Timestamp(date)
df.iloc[1, df.columns.get_loc('date')] = pd.NaT
df.iloc[5, df.columns.get_loc('date')] = pd.NaT
if date_unit:
json = df.to_json(date_format='iso', date_unit=date_unit)
else:
json = df.to_json(date_format='iso')
result = read_json(json)
assert_frame_equal(result, df)
test_w_date('20130101 20:43:42.123')
test_w_date('20130101 20:43:42', date_unit='s')
test_w_date('20130101 20:43:42.123', date_unit='ms')
test_w_date('20130101 20:43:42.123456', date_unit='us')
test_w_date('20130101 20:43:42.123456789', date_unit='ns')
msg = "Invalid value 'foo' for option 'date_unit'"
with pytest.raises(ValueError, match=msg):
df.to_json(date_format='iso', date_unit='foo')
示例7: test_date_format_series
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def test_date_format_series(self):
def test_w_date(date, date_unit=None):
ts = Series(Timestamp(date), index=self.ts.index)
ts.iloc[1] = pd.NaT
ts.iloc[5] = pd.NaT
if date_unit:
json = ts.to_json(date_format='iso', date_unit=date_unit)
else:
json = ts.to_json(date_format='iso')
result = read_json(json, typ='series')
assert_series_equal(result, ts)
test_w_date('20130101 20:43:42.123')
test_w_date('20130101 20:43:42', date_unit='s')
test_w_date('20130101 20:43:42.123', date_unit='ms')
test_w_date('20130101 20:43:42.123456', date_unit='us')
test_w_date('20130101 20:43:42.123456789', date_unit='ns')
ts = Series(Timestamp('20130101 20:43:42.123'), index=self.ts.index)
msg = "Invalid value 'foo' for option 'date_unit'"
with pytest.raises(ValueError, match=msg):
ts.to_json(date_format='iso', date_unit='foo')
示例8: test_date_unit
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def test_date_unit(self):
df = self.tsframe.copy()
df['date'] = Timestamp('20130101 20:43:42')
dl = df.columns.get_loc('date')
df.iloc[1, dl] = Timestamp('19710101 20:43:42')
df.iloc[2, dl] = Timestamp('21460101 20:43:42')
df.iloc[4, dl] = pd.NaT
for unit in ('s', 'ms', 'us', 'ns'):
json = df.to_json(date_format='epoch', date_unit=unit)
# force date unit
result = read_json(json, date_unit=unit)
assert_frame_equal(result, df)
# detect date unit
result = read_json(json, date_unit=None)
assert_frame_equal(result, df)
示例9: test_weird_nested_json
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def test_weird_nested_json(self):
# this used to core dump the parser
s = r'''{
"status": "success",
"data": {
"posts": [
{
"id": 1,
"title": "A blog post",
"body": "Some useful content"
},
{
"id": 2,
"title": "Another blog post",
"body": "More content"
}
]
}
}'''
read_json(s)
示例10: test_misc_example
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def test_misc_example(self):
# parsing unordered input fails
result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True)
expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
error_msg = """DataFrame\\.index are different
DataFrame\\.index values are different \\(100\\.0 %\\)
\\[left\\]: Index\\(\\[u?'a', u?'b'\\], dtype='object'\\)
\\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)"""
with pytest.raises(AssertionError, match=error_msg):
assert_frame_equal(result, expected, check_index_type=False)
result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]')
expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
assert_frame_equal(result, expected)
示例11: test_read_jsonl_unicode_chars
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def test_read_jsonl_unicode_chars(self):
# GH15132: non-ascii unicode characters
# \u201d == RIGHT DOUBLE QUOTATION MARK
# simulate file handle
json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
json = StringIO(json)
result = read_json(json, lines=True)
expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
columns=['a', 'b'])
assert_frame_equal(result, expected)
# simulate string
json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
result = read_json(json, lines=True)
expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
columns=['a', 'b'])
assert_frame_equal(result, expected)
示例12: test_comprehensive
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def test_comprehensive(self):
df = DataFrame(
{'A': [1, 2, 3, 4],
'B': ['a', 'b', 'c', 'c'],
'C': pd.date_range('2016-01-01', freq='d', periods=4),
# 'D': pd.timedelta_range('1H', periods=4, freq='T'),
'E': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])),
'F': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'],
ordered=True)),
'G': [1.1, 2.2, 3.3, 4.4],
# 'H': pd.date_range('2016-01-01', freq='d', periods=4,
# tz='US/Central'),
'I': [True, False, False, True],
},
index=pd.Index(range(4), name='idx'))
out = df.to_json(orient="table")
result = pd.read_json(out, orient="table")
tm.assert_frame_equal(df, result)
示例13: dff_to_table
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def dff_to_table(dff_json, dropdown_x, dropdown_y):
dff = pd.read_json(dff_json)
return {
'data': [{
'x': dff[dropdown_x],
'y': dff[dropdown_y],
'type': 'bar'
}],
'layout': {
'margin': {
'l': 20,
'r': 10,
'b': 60,
't': 10
}
}
}
示例14: main
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def main():
"""
A small tutorial to use HAN module
"""
filename = './News_Category_Dataset/News_Category_Dataset.json'
df = pd.read_json(filename, lines=True).reset_index()
df = preprocessing(df)
han_network = HAN.HAN(text = df.text, labels = df.category, num_categories = 30, pretrained_embedded_vector_path = './glove.6B/glove.6B.100d.txt', max_features = 200000, max_senten_len = 150, max_senten_num = 4 , embedding_size = 100, validation_split=0.2, verbose=1)
print(han_network.get_model().summary())
han_network.show_hyperparameters()
## How to change hyperparameters
# Let's add regularizers
# To replace a hyperparameter change the corresponding key value to the new value in set_hyperparameters
han_network.set_hyperparameters({'l2_regulizer': 1e-13, 'dropout_regulizer': 0.5})
han_network.show_hyperparameters()
print(han_network.get_model().summary())
han_network.train_model(epochs=3, batch_size=16,
best_model_path='./best_model.h5')
示例15: load_and_format
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_json [as 别名]
def load_and_format(in_path):
""" take the input data in .json format and return a df with the data and an np.array for the pictures """
out_df = pd.read_json(in_path)
out_images = out_df.apply(lambda c_row: [np.stack([c_row['band_1'],c_row['band_2']], -1).reshape((75,75,2))],1)
out_images = np.stack(out_images).squeeze()
return out_df, out_images