df = pd.DataFrame([['A', 'A1', 'label1', 1],
['A', 'A2', 'label2', 2],
['B', 'A1', 'label1', 3],
['B', 'A2', 'label2', 4]], columns=['index_1', 'index_2', 'label', 'value'])
df = df.set_index(['index_1', 'index_2'])
pivoted_df = df.pivot(index=None,
columns='label',
values = 'value')
La fonction Pivot donne une erreur NotImplementedError: isna is not defined for MultiIndex
. Lorsque l'index est défini sur None
.
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-84-54426dadf31d> in <module>()
2 pivoted_df = df.pivot(index=None,
3 columns='label',
----> 4 values = 'value')
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in pivot(self, index, columns, values)
5192 """
5193 from pandas.core.reshape.reshape import pivot
-> 5194 return pivot(self, index=index, columns=columns, values=values)
5195
5196 _shared_docs['pivot_table'] = """
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\reshape\reshape.py in pivot(self, index, columns, values)
404 else:
405 index = self[index]
--> 406 index = MultiIndex.from_arrays([index, self[columns]])
407
408 if is_list_like(values) and not isinstance(values, tuple):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\multi.py in from_arrays(cls, arrays, sortorder, names)
1272 from pandas.core.arrays.categorical import _factorize_from_iterables
1273
-> 1274 labels, levels = _factorize_from_iterables(arrays)
1275 if names is None:
1276 names = [getattr(arr, "name", None) for arr in arrays]
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in _factorize_from_iterables(iterables)
2541 # For consistency, it should return a list of 2 lists.
2542 return [[], []]
-> 2543 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in <listcomp>(.0)
2541 # For consistency, it should return a list of 2 lists.
2542 return [[], []]
-> 2543 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in _factorize_from_iterable(values)
2513 codes = values.codes
2514 else:
-> 2515 cat = Categorical(values, ordered=True)
2516 categories = cat.categories
2517 codes = cat.codes
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in __init__(self, values, categories, ordered, dtype, fastpath)
359
360 # we're inferring from values
--> 361 dtype = CategoricalDtype(categories, dtype.ordered)
362
363 elif is_categorical_dtype(values):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in __init__(self, categories, ordered)
136
137 def __init__(self, categories=None, ordered=None):
--> 138 self._finalize(categories, ordered, fastpath=False)
139
140 <strong i="12">@classmethod</strong>
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in _finalize(self, categories, ordered, fastpath)
161 if categories is not None:
162 categories = self.validate_categories(categories,
--> 163 fastpath=fastpath)
164
165 self._categories = categories
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in validate_categories(categories, fastpath)
318 if not fastpath:
319
--> 320 if categories.hasnans:
321 raise ValueError('Categorial categories cannot be null')
322
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in hasnans(self)
2237 """ return if I have any nans; enables various perf speedups """
2238 if self._can_hold_na:
-> 2239 return self._isnan.any()
2240 else:
2241 return False
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in _isnan(self)
2218 """ return if each value is nan"""
2219 if self._can_hold_na:
-> 2220 return isna(self)
2221 else:
2222 # shouldn't reach to this condition by checking hasnans beforehand
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\missing.py in isna(obj)
104 Name: 1, dtype: bool
105 """
--> 106 return _isna(obj)
107
108
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\missing.py in _isna_new(obj)
115 # hack (for now) because MI registers as ndarray
116 elif isinstance(obj, ABCMultiIndex):
--> 117 raise NotImplementedError("isna is not defined for MultiIndex")
118 elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass,
119 ABCExtensionArray)):
NotImplementedError: isna is not defined for MultiIndex
index_1 | index_2 | label1 | label2
- | - | - | -
A | A1 | 1.0 | NaN
|| A2 | NaN | 2.0
B | A1 | 3,0 | NaN
|| A2 | NaN | 4.0
pd.show_versions()
commit: aucun
python: 3.6.5.final.0
bits python: 64
OS: Windows
Version du système d'exploitation: 10
machine: AMD64
Processeur: Intel64 Family 6 Model 85 Stepping 4, Genuine Intel
byteorder: petit
LC_ALL: Aucun
LANG: Aucun
LOCALE: Aucune, aucune
pandas: 0,23,4
pytest: 3.5.1
pip: 10.0.1
setuptools: 39.1.0
Cython: 0,28,2
numpy: 1.15.4
scipy: 1.1.0
pyarrow: Aucun
xarray: Aucun
IPython: 6.4.0
sphinx: 1.7.4
patsy: 0,5,0
dateutil: 2.7.3
pytz: 2018.4
blosc: Aucun
goulot d'étranglement: 1.2.1
tableaux: 3.4.3
numexpr: 2.6.5
plume: Aucune
matplotlib: 2.2.2
openpyxl: 2.5.3
xlrd: 1.1.0
xlwt: 1.3.0
xlsxwriter: 1.0.4
lxml: 4.2.1
bs4: 4.6.0
html5lib: 1.0.1
sqlalchemy: 1.2.7
pymysql: Aucun
psycopg2: Aucun
jinja2: 2.10
s3fs: Aucun
fastparquet: Aucun
pandas_gbq: Aucun
pandas_datareader: Aucun
Des mises à jour à ce sujet? Si je comprends bien, actuellement, la méthode pivot()
ne fonctionne tout simplement pas avec plusieurs indexeurs, l'argument index
n'accepte pas une liste, et quand None
il échoue en effet car il tente de utilisez le MultiIndex existant.
À partir de maintenant, je résous cela de manière hacky en générant un seul index comme une concaténation des multiples niveaux des indices originaux, pivoter puis reconstruire les différents niveaux du MultiIndex en divisant l'index unique concaténé. Suite à l'exemple @srajanpaliwal :
(df.reset_index()
.assign(new_index=lambda dd: dd['index_1'].str.cat(dd['index_2'], sep='_'))
.pivot(index='new_index', columns='label', values='value')
.assign(index_1=lambda dd: dd.index.str.split('_').str.get(0),
index_2=lambda dd: dd.index.str.split('_').str.get(1))
.set_index(['index_1', 'index_2']))
Production:
| | label | label1 | label2 |
| --------- | --------- | -------- | -------- |
| index_1 | index_1 | | |
| A | A1 | 1.0 | NaN |
| | A2 | NaN | 2.0 | |
| B | A1 | 3.0 | NaN |
|| A2 | NaN | 4,0 | |
Dans tous les cas, y a-t-il une raison pour laquelle MultiIndex n'est pas accepté avec l'opération pivot()
?
Merci pour la solution https://github.com/pandas-dev/pandas/issues/23955#issuecomment -480804068. Si cela sauve quelqu'un, voici une généralisation
def multiindex_pivot(df, columns=None, values=None):
#https://github.com/pandas-dev/pandas/issues/23955
names = list(df.index.names)
df = df.reset_index()
list_index = df[names].values
tuples_index = [tuple(i) for i in list_index] # hashable
df = df.assign(tuples_index=tuples_index)
df = df.pivot(index="tuples_index", columns=columns, values=values)
tuples_index = df.index # reduced
index = pd.MultiIndex.from_tuples(tuples_index, names=names)
df.index = index
return df
léger ajustement du commentaire @gmacario par souci d'uniformité avec l'API pivot
def multiindex_pivot(df, index=None, columns=None, values=None):
#https://github.com/pandas-dev/pandas/issues/23955
if index is None:
names = list(df.index.names)
df = df.reset_index()
else:
names = index
list_index = df[names].values
tuples_index = [tuple(i) for i in list_index] # hashable
df = df.assign(tuples_index=tuples_index)
df = df.pivot(index="tuples_index", columns=columns, values=values)
tuples_index = df.index # reduced
index = pd.MultiIndex.from_tuples(tuples_index, names=names)
df.index = index
return df
usage:
df.pipe(multiindex_pivot, index=['idx_column1', 'idx_column2'], columns='foo', values='bar')
une autre légère amélioration qui permet également plusieurs columns=
(pas complètement testé, mais fonctionne dans mes exemples):
def multiindex_pivot(df, index=None, columns=None, values=None):
# https://github.com/pandas-dev/pandas/issues/23955
if index is None:
names = list(df.index.names)
df = df.reset_index()
else:
names = index
df = df.assign(tuples_index=[tuple(i) for i in df[names].values]) # hashable
df = df.assign(tuples_columns=[tuple(i) for i in df[columns].values]) # hashable
df = df.pivot(index='tuples_index', columns='tuples_columns', values=values)
df.index = pd.MultiIndex.from_tuples(df.index, names=names) # reduced
df.columns = pd.MultiIndex.from_tuples(df.columns, names=columns) # reduced
return df
usage:
df.pipe(multiindex_pivot,
index=['idx_column1', 'idx_column2'],
columns=['col_column1', 'col_column2'],
values='bar')
Encore une autre version légèrement améliorée:
def multiIndex_pivot(df, index = None, columns = None, values = None):
# https://github.com/pandas-dev/pandas/issues/23955
output_df = df.copy(deep = True)
if index is None:
names = list(output_df.index.names)
output_df = output_df.reset_index()
else:
names = index
output_df = output_df.assign(tuples_index = [tuple(i) for i in output_df[names].values])
if isinstance(columns, list):
output_df = output_df.assign(tuples_columns = [tuple(i) for i in output_df[columns].values]) # hashable
output_df = output_df.pivot(index = 'tuples_index', columns = 'tuples_columns', values = values)
output_df.columns = pd.MultiIndex.from_tuples(output_df.columns, names = columns) # reduced
else:
output_df = output_df.pivot(index = 'tuples_index', columns = columns, values = values)
output_df.index = pd.MultiIndex.from_tuples(output_df.index, names = names)
return output_df
Usage:
df.pipe(multiIndex_pivot, index = ['idx_column1', 'idx_column2'], columns = ['col_column1', 'col_column2'], values = 'bar')
Commentaire le plus utile
Merci pour la solution https://github.com/pandas-dev/pandas/issues/23955#issuecomment -480804068. Si cela sauve quelqu'un, voici une généralisation