df = pd.DataFrame([['A', 'A1', 'label1', 1],
['A', 'A2', 'label2', 2],
['B', 'A1', 'label1', 3],
['B', 'A2', 'label2', 4]], columns=['index_1', 'index_2', 'label', 'value'])
df = df.set_index(['index_1', 'index_2'])
pivoted_df = df.pivot(index=None,
columns='label',
values = 'value')
Pivot-Funktion gibt einen Fehler NotImplementedError: isna is not defined for MultiIndex
. Wenn der Index auf None
.
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-84-54426dadf31d> in <module>()
2 pivoted_df = df.pivot(index=None,
3 columns='label',
----> 4 values = 'value')
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in pivot(self, index, columns, values)
5192 """
5193 from pandas.core.reshape.reshape import pivot
-> 5194 return pivot(self, index=index, columns=columns, values=values)
5195
5196 _shared_docs['pivot_table'] = """
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\reshape\reshape.py in pivot(self, index, columns, values)
404 else:
405 index = self[index]
--> 406 index = MultiIndex.from_arrays([index, self[columns]])
407
408 if is_list_like(values) and not isinstance(values, tuple):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\multi.py in from_arrays(cls, arrays, sortorder, names)
1272 from pandas.core.arrays.categorical import _factorize_from_iterables
1273
-> 1274 labels, levels = _factorize_from_iterables(arrays)
1275 if names is None:
1276 names = [getattr(arr, "name", None) for arr in arrays]
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in _factorize_from_iterables(iterables)
2541 # For consistency, it should return a list of 2 lists.
2542 return [[], []]
-> 2543 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in <listcomp>(.0)
2541 # For consistency, it should return a list of 2 lists.
2542 return [[], []]
-> 2543 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in _factorize_from_iterable(values)
2513 codes = values.codes
2514 else:
-> 2515 cat = Categorical(values, ordered=True)
2516 categories = cat.categories
2517 codes = cat.codes
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in __init__(self, values, categories, ordered, dtype, fastpath)
359
360 # we're inferring from values
--> 361 dtype = CategoricalDtype(categories, dtype.ordered)
362
363 elif is_categorical_dtype(values):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in __init__(self, categories, ordered)
136
137 def __init__(self, categories=None, ordered=None):
--> 138 self._finalize(categories, ordered, fastpath=False)
139
140 <strong i="12">@classmethod</strong>
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in _finalize(self, categories, ordered, fastpath)
161 if categories is not None:
162 categories = self.validate_categories(categories,
--> 163 fastpath=fastpath)
164
165 self._categories = categories
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in validate_categories(categories, fastpath)
318 if not fastpath:
319
--> 320 if categories.hasnans:
321 raise ValueError('Categorial categories cannot be null')
322
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in hasnans(self)
2237 """ return if I have any nans; enables various perf speedups """
2238 if self._can_hold_na:
-> 2239 return self._isnan.any()
2240 else:
2241 return False
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in _isnan(self)
2218 """ return if each value is nan"""
2219 if self._can_hold_na:
-> 2220 return isna(self)
2221 else:
2222 # shouldn't reach to this condition by checking hasnans beforehand
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\missing.py in isna(obj)
104 Name: 1, dtype: bool
105 """
--> 106 return _isna(obj)
107
108
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\missing.py in _isna_new(obj)
115 # hack (for now) because MI registers as ndarray
116 elif isinstance(obj, ABCMultiIndex):
--> 117 raise NotImplementedError("isna is not defined for MultiIndex")
118 elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass,
119 ABCExtensionArray)):
NotImplementedError: isna is not defined for MultiIndex
index_1 | index_2 | label1 | label2
- | - | - | - -
A | A1 | 1.0 | NaN
|| A2 | NaN | 2.0
B | A1 | 3.0 | NaN
|| A2 | NaN | 4.0
pd.show_versions()
Festschreiben: Keine
Python: 3.6.5.final.0
Python-Bits: 64
Betriebssystem: Windows
Betriebssystem-Release: 10
Maschine: AMD64
Prozessor: Intel64 Family 6 Model 85 Stepping 4, GenuineIntel
Byteorder: wenig
LC_ALL: Keine
LANG: Keine
LOCALE: Keine
Pandas: 0,23,4
Pytest: 3.5.1
pip: 10.0.1
setuptools: 39.1.0
Cython: 0,28,2
Anzahl: 1.15.4
scipy: 1.1.0
Pyarrow: Keine
xarray: Keine
IPython: 6.4.0
Sphinx: 1.7.4
Patsy: 0,5,0
Datum: 2.7.3
Pytz: 2018,4
blosc: Keine
Engpass: 1.2.1
Tabellen: 3.4.3
numexpr: 2.6.5
Feder: Keine
matplotlib: 2.2.2
openpyxl: 2.5.3
xlrd: 1.1.0
xlwt: 1.3.0
xlsxwriter: 1.0.4
lxml: 4.2.1
bs4: 4.6.0
html5lib: 1.0.1
sqlalchemy: 1.2.7
pymysql: Keine
psycopg2: Keine
jinja2: 2.10
s3fs: Keine
Fastparquet: Keine
pandas_gbq: Keine
pandas_datareader: Keine
Irgendwelche Updates dazu? Soweit ich weiß, funktioniert die Methode pivot()
derzeit nicht mit mehreren Indexern, das Argument index
akzeptiert keine Liste, und wenn None
tatsächlich fehlschlägt, da dies versucht wird Verwenden Sie den vorhandenen MultiIndex.
Ab sofort löse ich dies auf hackige Weise, indem ich einen einzelnen Index als Verkettung der mehreren Ebenen der ursprünglichen Indizes generiere, schwenke und dann die verschiedenen Ebenen des MultiIndex rekonstruiere, indem ich den verketteten einzelnen Index aufteile. Folgen Sie dem Beispiel
(df.reset_index()
.assign(new_index=lambda dd: dd['index_1'].str.cat(dd['index_2'], sep='_'))
.pivot(index='new_index', columns='label', values='value')
.assign(index_1=lambda dd: dd.index.str.split('_').str.get(0),
index_2=lambda dd: dd.index.str.split('_').str.get(1))
.set_index(['index_1', 'index_2']))
Ausgabe:
| | Etikett | label1 | label2 |
| --------- | --------- | -------- | -------- |
| index_1 | index_1 | | |
| A | A1 | 1,0 | NaN |
| | A2 | NaN | 2,0 | |
| B | A1 | 3.0 | NaN |
|| A2 | NaN | 4.0 | |
Gibt es einen Grund, warum MultiIndex bei der Operation pivot()
nicht akzeptiert wird?
Vielen Dank für die Lösung https://github.com/pandas-dev/pandas/issues/23955#issuecomment -480804068. Wenn es jemandem die Mühe erspart, hier eine Verallgemeinerung
def multiindex_pivot(df, columns=None, values=None):
#https://github.com/pandas-dev/pandas/issues/23955
names = list(df.index.names)
df = df.reset_index()
list_index = df[names].values
tuples_index = [tuple(i) for i in list_index] # hashable
df = df.assign(tuples_index=tuples_index)
df = df.pivot(index="tuples_index", columns=columns, values=values)
tuples_index = df.index # reduced
index = pd.MultiIndex.from_tuples(tuples_index, names=names)
df.index = index
return df
leichte Anpassung des @ gmacario- Kommentars aus Gründen der Einheitlichkeit mit der Pivot-API
def multiindex_pivot(df, index=None, columns=None, values=None):
#https://github.com/pandas-dev/pandas/issues/23955
if index is None:
names = list(df.index.names)
df = df.reset_index()
else:
names = index
list_index = df[names].values
tuples_index = [tuple(i) for i in list_index] # hashable
df = df.assign(tuples_index=tuples_index)
df = df.pivot(index="tuples_index", columns=columns, values=values)
tuples_index = df.index # reduced
index = pd.MultiIndex.from_tuples(tuples_index, names=names)
df.index = index
return df
Verwendung:
df.pipe(multiindex_pivot, index=['idx_column1', 'idx_column2'], columns='foo', values='bar')
Eine weitere kleine Verbesserung, die auch mehrere columns=
erlaubt (nicht gründlich getestet, funktioniert aber in meinen Beispielen):
def multiindex_pivot(df, index=None, columns=None, values=None):
# https://github.com/pandas-dev/pandas/issues/23955
if index is None:
names = list(df.index.names)
df = df.reset_index()
else:
names = index
df = df.assign(tuples_index=[tuple(i) for i in df[names].values]) # hashable
df = df.assign(tuples_columns=[tuple(i) for i in df[columns].values]) # hashable
df = df.pivot(index='tuples_index', columns='tuples_columns', values=values)
df.index = pd.MultiIndex.from_tuples(df.index, names=names) # reduced
df.columns = pd.MultiIndex.from_tuples(df.columns, names=columns) # reduced
return df
Verwendung:
df.pipe(multiindex_pivot,
index=['idx_column1', 'idx_column2'],
columns=['col_column1', 'col_column2'],
values='bar')
Noch eine leicht verbesserte Version:
def multiIndex_pivot(df, index = None, columns = None, values = None):
# https://github.com/pandas-dev/pandas/issues/23955
output_df = df.copy(deep = True)
if index is None:
names = list(output_df.index.names)
output_df = output_df.reset_index()
else:
names = index
output_df = output_df.assign(tuples_index = [tuple(i) for i in output_df[names].values])
if isinstance(columns, list):
output_df = output_df.assign(tuples_columns = [tuple(i) for i in output_df[columns].values]) # hashable
output_df = output_df.pivot(index = 'tuples_index', columns = 'tuples_columns', values = values)
output_df.columns = pd.MultiIndex.from_tuples(output_df.columns, names = columns) # reduced
else:
output_df = output_df.pivot(index = 'tuples_index', columns = columns, values = values)
output_df.index = pd.MultiIndex.from_tuples(output_df.index, names = names)
return output_df
Verwendung:
df.pipe(multiIndex_pivot, index = ['idx_column1', 'idx_column2'], columns = ['col_column1', 'col_column2'], values = 'bar')
Hilfreichster Kommentar
Vielen Dank für die Lösung https://github.com/pandas-dev/pandas/issues/23955#issuecomment -480804068. Wenn es jemandem die Mühe erspart, hier eine Verallgemeinerung