df = pd.DataFrame([['A', 'A1', 'label1', 1],
['A', 'A2', 'label2', 2],
['B', 'A1', 'label1', 3],
['B', 'A2', 'label2', 4]], columns=['index_1', 'index_2', 'label', 'value'])
df = df.set_index(['index_1', 'index_2'])
pivoted_df = df.pivot(index=None,
columns='label',
values = 'value')
Fungsi pivot memberikan kesalahan NotImplementedError: isna is not defined for MultiIndex
. Saat indeks disetel ke None
.
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-84-54426dadf31d> in <module>()
2 pivoted_df = df.pivot(index=None,
3 columns='label',
----> 4 values = 'value')
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in pivot(self, index, columns, values)
5192 """
5193 from pandas.core.reshape.reshape import pivot
-> 5194 return pivot(self, index=index, columns=columns, values=values)
5195
5196 _shared_docs['pivot_table'] = """
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\reshape\reshape.py in pivot(self, index, columns, values)
404 else:
405 index = self[index]
--> 406 index = MultiIndex.from_arrays([index, self[columns]])
407
408 if is_list_like(values) and not isinstance(values, tuple):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\multi.py in from_arrays(cls, arrays, sortorder, names)
1272 from pandas.core.arrays.categorical import _factorize_from_iterables
1273
-> 1274 labels, levels = _factorize_from_iterables(arrays)
1275 if names is None:
1276 names = [getattr(arr, "name", None) for arr in arrays]
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in _factorize_from_iterables(iterables)
2541 # For consistency, it should return a list of 2 lists.
2542 return [[], []]
-> 2543 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in <listcomp>(.0)
2541 # For consistency, it should return a list of 2 lists.
2542 return [[], []]
-> 2543 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in _factorize_from_iterable(values)
2513 codes = values.codes
2514 else:
-> 2515 cat = Categorical(values, ordered=True)
2516 categories = cat.categories
2517 codes = cat.codes
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in __init__(self, values, categories, ordered, dtype, fastpath)
359
360 # we're inferring from values
--> 361 dtype = CategoricalDtype(categories, dtype.ordered)
362
363 elif is_categorical_dtype(values):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in __init__(self, categories, ordered)
136
137 def __init__(self, categories=None, ordered=None):
--> 138 self._finalize(categories, ordered, fastpath=False)
139
140 <strong i="12">@classmethod</strong>
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in _finalize(self, categories, ordered, fastpath)
161 if categories is not None:
162 categories = self.validate_categories(categories,
--> 163 fastpath=fastpath)
164
165 self._categories = categories
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in validate_categories(categories, fastpath)
318 if not fastpath:
319
--> 320 if categories.hasnans:
321 raise ValueError('Categorial categories cannot be null')
322
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in hasnans(self)
2237 """ return if I have any nans; enables various perf speedups """
2238 if self._can_hold_na:
-> 2239 return self._isnan.any()
2240 else:
2241 return False
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in _isnan(self)
2218 """ return if each value is nan"""
2219 if self._can_hold_na:
-> 2220 return isna(self)
2221 else:
2222 # shouldn't reach to this condition by checking hasnans beforehand
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\missing.py in isna(obj)
104 Name: 1, dtype: bool
105 """
--> 106 return _isna(obj)
107
108
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\missing.py in _isna_new(obj)
115 # hack (for now) because MI registers as ndarray
116 elif isinstance(obj, ABCMultiIndex):
--> 117 raise NotImplementedError("isna is not defined for MultiIndex")
118 elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass,
119 ABCExtensionArray)):
NotImplementedError: isna is not defined for MultiIndex
index_1 | index_2 | label1 | label2
- | - | - | -
A | A1 | 1.0 | NaN
|| A2 | NaN | 2.0
B | A1 | 3.0 | NaN
|| A2 | NaN | 4.0
pd.show_versions()
commit: Tidak ada
python: 3.6.5.final.0
python-bits: 64
OS: Windows
Rilis OS: 10
mesin: AMD64
prosesor: Intel64 Family 6 Model 85 Stepping 4, GenuineIntel
byteorder: sedikit
LC_ALL: Tidak ada
LANG: Tidak ada
LOCALE: Tidak ada. Tidak ada
panda: 0.23.4
pytest: 3.5.1
pip: 10.0.1
setuptools: 39.1.0
Cython: 0.28.2
numpy: 1.15.4
scipy: 1.1.0
pyarrow: Tidak ada
xarray: Tidak ada
IPython: 6.4.0
sphinx: 1.7.4
patsy: 0.5.0
dateutil: 2.7.3
pytz: 2018.4
blosc: Tidak ada
kemacetan: 1.2.1
tabel: 3.4.3
numexpr: 2.6.5
bulu: Tidak ada
matplotlib: 2.2.2
openpyxl: 2.5.3
xlrd: 1.1.0
xlwt: 1.3.0
xlsxwriter: 1.0.4
lxml: 4.2.1
bs4: 4.6.0
html5lib: 1.0.1
sqlalchemy: 1.2.7
pymysql: Tidak ada
psycopg2: Tidak ada
jinja2: 2.10
s3fs: Tidak ada
fastparquet: Tidak ada
pandas_gbq: Tidak ada
pandas_datareader: Tidak ada
Ada pembaruan tentang ini? Seperti yang saya pahami, saat ini metode pivot()
tidak berfungsi dengan banyak pengindeks, argumen index
tidak menerima daftar, dan ketika None
itu benar-benar gagal karena mencoba untuk gunakan MultiIndex yang ada.
Sampai sekarang, saya menyelesaikan ini dengan cara hacky dengan menghasilkan indeks tunggal sebagai rangkaian dari beberapa level indeks asli, pivot dan kemudian merekonstruksi level MultiIndex yang berbeda dengan memisahkan Indeks tunggal yang digabungkan. Mengikuti contoh @srajanpaliwal :
(df.reset_index()
.assign(new_index=lambda dd: dd['index_1'].str.cat(dd['index_2'], sep='_'))
.pivot(index='new_index', columns='label', values='value')
.assign(index_1=lambda dd: dd.index.str.split('_').str.get(0),
index_2=lambda dd: dd.index.str.split('_').str.get(1))
.set_index(['index_1', 'index_2']))
Keluaran:
| | label | label1 | label2 |
| --------- | --------- | -------- | -------- |
| index_1 | index_1 | | |
| A | A1 | 1.0 | NaN |
| | A2 | NaN | 2.0 | |
| B | A1 | 3.0 | NaN |
|| A2 | NaN | 4.0 | |
Apa pun alasannya, apakah ada alasan mengapa MultiIndex tidak diterima dengan operasi pivot()
?
Terima kasih atas solusinya https://github.com/pandas-dev/pandas/issues/23955#issuecomment -480804068. Jika itu menyelamatkan seseorang dari masalah, berikut adalah generalisasi
def multiindex_pivot(df, columns=None, values=None):
#https://github.com/pandas-dev/pandas/issues/23955
names = list(df.index.names)
df = df.reset_index()
list_index = df[names].values
tuples_index = [tuple(i) for i in list_index] # hashable
df = df.assign(tuples_index=tuples_index)
df = df.pivot(index="tuples_index", columns=columns, values=values)
tuples_index = df.index # reduced
index = pd.MultiIndex.from_tuples(tuples_index, names=names)
df.index = index
return df
sedikit penyesuaian komentar @gmacario demi keseragaman dengan pivot api
def multiindex_pivot(df, index=None, columns=None, values=None):
#https://github.com/pandas-dev/pandas/issues/23955
if index is None:
names = list(df.index.names)
df = df.reset_index()
else:
names = index
list_index = df[names].values
tuples_index = [tuple(i) for i in list_index] # hashable
df = df.assign(tuples_index=tuples_index)
df = df.pivot(index="tuples_index", columns=columns, values=values)
tuples_index = df.index # reduced
index = pd.MultiIndex.from_tuples(tuples_index, names=names)
df.index = index
return df
pemakaian:
df.pipe(multiindex_pivot, index=['idx_column1', 'idx_column2'], columns='foo', values='bar')
peningkatan kecil lainnya yang memungkinkan beberapa columns=
juga (tidak diuji secara menyeluruh, tetapi berfungsi dalam contoh saya):
def multiindex_pivot(df, index=None, columns=None, values=None):
# https://github.com/pandas-dev/pandas/issues/23955
if index is None:
names = list(df.index.names)
df = df.reset_index()
else:
names = index
df = df.assign(tuples_index=[tuple(i) for i in df[names].values]) # hashable
df = df.assign(tuples_columns=[tuple(i) for i in df[columns].values]) # hashable
df = df.pivot(index='tuples_index', columns='tuples_columns', values=values)
df.index = pd.MultiIndex.from_tuples(df.index, names=names) # reduced
df.columns = pd.MultiIndex.from_tuples(df.columns, names=columns) # reduced
return df
pemakaian:
df.pipe(multiindex_pivot,
index=['idx_column1', 'idx_column2'],
columns=['col_column1', 'col_column2'],
values='bar')
Namun versi lain yang sedikit lebih baik:
def multiIndex_pivot(df, index = None, columns = None, values = None):
# https://github.com/pandas-dev/pandas/issues/23955
output_df = df.copy(deep = True)
if index is None:
names = list(output_df.index.names)
output_df = output_df.reset_index()
else:
names = index
output_df = output_df.assign(tuples_index = [tuple(i) for i in output_df[names].values])
if isinstance(columns, list):
output_df = output_df.assign(tuples_columns = [tuple(i) for i in output_df[columns].values]) # hashable
output_df = output_df.pivot(index = 'tuples_index', columns = 'tuples_columns', values = values)
output_df.columns = pd.MultiIndex.from_tuples(output_df.columns, names = columns) # reduced
else:
output_df = output_df.pivot(index = 'tuples_index', columns = columns, values = values)
output_df.index = pd.MultiIndex.from_tuples(output_df.index, names = names)
return output_df
Pemakaian:
df.pipe(multiIndex_pivot, index = ['idx_column1', 'idx_column2'], columns = ['col_column1', 'col_column2'], values = 'bar')
Komentar yang paling membantu
Terima kasih atas solusinya https://github.com/pandas-dev/pandas/issues/23955#issuecomment -480804068. Jika itu menyelamatkan seseorang dari masalah, berikut adalah generalisasi