df = pd.DataFrame([['A', 'A1', 'label1', 1],
['A', 'A2', 'label2', 2],
['B', 'A1', 'label1', 3],
['B', 'A2', 'label2', 4]], columns=['index_1', 'index_2', 'label', 'value'])
df = df.set_index(['index_1', 'index_2'])
pivoted_df = df.pivot(index=None,
columns='label',
values = 'value')
Pivot function give an error NotImplementedError: isna is not defined for MultiIndex
. When index is set to None
.
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-84-54426dadf31d> in <module>()
2 pivoted_df = df.pivot(index=None,
3 columns='label',
----> 4 values = 'value')
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in pivot(self, index, columns, values)
5192 """
5193 from pandas.core.reshape.reshape import pivot
-> 5194 return pivot(self, index=index, columns=columns, values=values)
5195
5196 _shared_docs['pivot_table'] = """
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\reshape\reshape.py in pivot(self, index, columns, values)
404 else:
405 index = self[index]
--> 406 index = MultiIndex.from_arrays([index, self[columns]])
407
408 if is_list_like(values) and not isinstance(values, tuple):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\multi.py in from_arrays(cls, arrays, sortorder, names)
1272 from pandas.core.arrays.categorical import _factorize_from_iterables
1273
-> 1274 labels, levels = _factorize_from_iterables(arrays)
1275 if names is None:
1276 names = [getattr(arr, "name", None) for arr in arrays]
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in _factorize_from_iterables(iterables)
2541 # For consistency, it should return a list of 2 lists.
2542 return [[], []]
-> 2543 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in <listcomp>(.0)
2541 # For consistency, it should return a list of 2 lists.
2542 return [[], []]
-> 2543 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in _factorize_from_iterable(values)
2513 codes = values.codes
2514 else:
-> 2515 cat = Categorical(values, ordered=True)
2516 categories = cat.categories
2517 codes = cat.codes
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in __init__(self, values, categories, ordered, dtype, fastpath)
359
360 # we're inferring from values
--> 361 dtype = CategoricalDtype(categories, dtype.ordered)
362
363 elif is_categorical_dtype(values):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in __init__(self, categories, ordered)
136
137 def __init__(self, categories=None, ordered=None):
--> 138 self._finalize(categories, ordered, fastpath=False)
139
140 @classmethod
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in _finalize(self, categories, ordered, fastpath)
161 if categories is not None:
162 categories = self.validate_categories(categories,
--> 163 fastpath=fastpath)
164
165 self._categories = categories
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in validate_categories(categories, fastpath)
318 if not fastpath:
319
--> 320 if categories.hasnans:
321 raise ValueError('Categorial categories cannot be null')
322
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in hasnans(self)
2237 """ return if I have any nans; enables various perf speedups """
2238 if self._can_hold_na:
-> 2239 return self._isnan.any()
2240 else:
2241 return False
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in _isnan(self)
2218 """ return if each value is nan"""
2219 if self._can_hold_na:
-> 2220 return isna(self)
2221 else:
2222 # shouldn't reach to this condition by checking hasnans beforehand
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\missing.py in isna(obj)
104 Name: 1, dtype: bool
105 """
--> 106 return _isna(obj)
107
108
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\missing.py in _isna_new(obj)
115 # hack (for now) because MI registers as ndarray
116 elif isinstance(obj, ABCMultiIndex):
--> 117 raise NotImplementedError("isna is not defined for MultiIndex")
118 elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass,
119 ABCExtensionArray)):
NotImplementedError: isna is not defined for MultiIndex
index_1 | index_2 | label1 | label2
-- | -- | -- | --
A|A1|1.0 | NaN
||A2|NaN | 2.0
B|A1|3.0 | NaN
||A2|NaN | 4.0
pd.show_versions()
commit: None
python: 3.6.5.final.0
python-bits: 64
OS: Windows
OS-release: 10
machine: AMD64
processor: Intel64 Family 6 Model 85 Stepping 4, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
LOCALE: None.None
pandas: 0.23.4
pytest: 3.5.1
pip: 10.0.1
setuptools: 39.1.0
Cython: 0.28.2
numpy: 1.15.4
scipy: 1.1.0
pyarrow: None
xarray: None
IPython: 6.4.0
sphinx: 1.7.4
patsy: 0.5.0
dateutil: 2.7.3
pytz: 2018.4
blosc: None
bottleneck: 1.2.1
tables: 3.4.3
numexpr: 2.6.5
feather: None
matplotlib: 2.2.2
openpyxl: 2.5.3
xlrd: 1.1.0
xlwt: 1.3.0
xlsxwriter: 1.0.4
lxml: 4.2.1
bs4: 4.6.0
html5lib: 1.0.1
sqlalchemy: 1.2.7
pymysql: None
psycopg2: None
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None
Any updates on this? As I understand, currently the pivot()
method just does not work with multiple indexers, the index
argument does not accept a list, and when None
it indeed fails since it attempts to use the existing MultiIndex.
As of now, I solve this in a hacky way by generating a single index as a concatenation of the multiple levels of the original indices, pivot and then reconstructing the different levels of the MultiIndex by splitting the concatenated single Index. Following on @srajanpaliwal example:
(df.reset_index()
.assign(new_index=lambda dd: dd['index_1'].str.cat(dd['index_2'], sep='_'))
.pivot(index='new_index', columns='label', values='value')
.assign(index_1=lambda dd: dd.index.str.split('_').str.get(0),
index_2=lambda dd: dd.index.str.split('_').str.get(1))
.set_index(['index_1', 'index_2']))
Output:
| | label | label1 | label2 |
|---------|---------|--------|--------|
| index_1 | index_1 | | |
| A | A1 | 1.0 | NaN |
| | A2 | NaN | 2.0 | |
| B | A1 | 3.0 | NaN |
|| A2 | NaN | 4.0 | |
Either way, is there a reason why MultiIndex is not accepted with the pivot()
operation?
Thanks for the solution https://github.com/pandas-dev/pandas/issues/23955#issuecomment-480804068. If it saves someone the trouble, here is a generalization
def multiindex_pivot(df, columns=None, values=None):
#https://github.com/pandas-dev/pandas/issues/23955
names = list(df.index.names)
df = df.reset_index()
list_index = df[names].values
tuples_index = [tuple(i) for i in list_index] # hashable
df = df.assign(tuples_index=tuples_index)
df = df.pivot(index="tuples_index", columns=columns, values=values)
tuples_index = df.index # reduced
index = pd.MultiIndex.from_tuples(tuples_index, names=names)
df.index = index
return df
slight adjustment of @gmacario comment for the sake of uniformity with the pivot api
def multiindex_pivot(df, index=None, columns=None, values=None):
#https://github.com/pandas-dev/pandas/issues/23955
if index is None:
names = list(df.index.names)
df = df.reset_index()
else:
names = index
list_index = df[names].values
tuples_index = [tuple(i) for i in list_index] # hashable
df = df.assign(tuples_index=tuples_index)
df = df.pivot(index="tuples_index", columns=columns, values=values)
tuples_index = df.index # reduced
index = pd.MultiIndex.from_tuples(tuples_index, names=names)
df.index = index
return df
usage:
df.pipe(multiindex_pivot, index=['idx_column1', 'idx_column2'], columns='foo', values='bar')
another slight enhancement that allows multiple columns=
too (not thoroughly tested, but works in my examples):
def multiindex_pivot(df, index=None, columns=None, values=None):
# https://github.com/pandas-dev/pandas/issues/23955
if index is None:
names = list(df.index.names)
df = df.reset_index()
else:
names = index
df = df.assign(tuples_index=[tuple(i) for i in df[names].values]) # hashable
df = df.assign(tuples_columns=[tuple(i) for i in df[columns].values]) # hashable
df = df.pivot(index='tuples_index', columns='tuples_columns', values=values)
df.index = pd.MultiIndex.from_tuples(df.index, names=names) # reduced
df.columns = pd.MultiIndex.from_tuples(df.columns, names=columns) # reduced
return df
usage:
df.pipe(multiindex_pivot,
index=['idx_column1', 'idx_column2'],
columns=['col_column1', 'col_column2'],
values='bar')
Yet another slightly improved version:
def multiIndex_pivot(df, index = None, columns = None, values = None):
# https://github.com/pandas-dev/pandas/issues/23955
output_df = df.copy(deep = True)
if index is None:
names = list(output_df.index.names)
output_df = output_df.reset_index()
else:
names = index
output_df = output_df.assign(tuples_index = [tuple(i) for i in output_df[names].values])
if isinstance(columns, list):
output_df = output_df.assign(tuples_columns = [tuple(i) for i in output_df[columns].values]) # hashable
output_df = output_df.pivot(index = 'tuples_index', columns = 'tuples_columns', values = values)
output_df.columns = pd.MultiIndex.from_tuples(output_df.columns, names = columns) # reduced
else:
output_df = output_df.pivot(index = 'tuples_index', columns = columns, values = values)
output_df.index = pd.MultiIndex.from_tuples(output_df.index, names = names)
return output_df
Usage:
df.pipe(multiIndex_pivot, index = ['idx_column1', 'idx_column2'], columns = ['col_column1', 'col_column2'], values = 'bar')
Most helpful comment
Thanks for the solution https://github.com/pandas-dev/pandas/issues/23955#issuecomment-480804068. If it saves someone the trouble, here is a generalization