df = pd.DataFrame([['A', 'A1', 'label1', 1],
['A', 'A2', 'label2', 2],
['B', 'A1', 'label1', 3],
['B', 'A2', 'label2', 4]], columns=['index_1', 'index_2', 'label', 'value'])
df = df.set_index(['index_1', 'index_2'])
pivoted_df = df.pivot(index=None,
columns='label',
values = 'value')
Π€ΡΠ½ΠΊΡΠΈΡ Pivot Π²ΡΠ΄Π°Π΅Ρ ΠΎΡΠΈΠ±ΠΊΡ NotImplementedError: isna is not defined for MultiIndex
. ΠΠΎΠ³Π΄Π° index ΡΡΡΠ°Π½ΠΎΠ²Π»Π΅Π½ Π² None
.
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-84-54426dadf31d> in <module>()
2 pivoted_df = df.pivot(index=None,
3 columns='label',
----> 4 values = 'value')
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in pivot(self, index, columns, values)
5192 """
5193 from pandas.core.reshape.reshape import pivot
-> 5194 return pivot(self, index=index, columns=columns, values=values)
5195
5196 _shared_docs['pivot_table'] = """
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\reshape\reshape.py in pivot(self, index, columns, values)
404 else:
405 index = self[index]
--> 406 index = MultiIndex.from_arrays([index, self[columns]])
407
408 if is_list_like(values) and not isinstance(values, tuple):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\multi.py in from_arrays(cls, arrays, sortorder, names)
1272 from pandas.core.arrays.categorical import _factorize_from_iterables
1273
-> 1274 labels, levels = _factorize_from_iterables(arrays)
1275 if names is None:
1276 names = [getattr(arr, "name", None) for arr in arrays]
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in _factorize_from_iterables(iterables)
2541 # For consistency, it should return a list of 2 lists.
2542 return [[], []]
-> 2543 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in <listcomp>(.0)
2541 # For consistency, it should return a list of 2 lists.
2542 return [[], []]
-> 2543 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in _factorize_from_iterable(values)
2513 codes = values.codes
2514 else:
-> 2515 cat = Categorical(values, ordered=True)
2516 categories = cat.categories
2517 codes = cat.codes
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in __init__(self, values, categories, ordered, dtype, fastpath)
359
360 # we're inferring from values
--> 361 dtype = CategoricalDtype(categories, dtype.ordered)
362
363 elif is_categorical_dtype(values):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in __init__(self, categories, ordered)
136
137 def __init__(self, categories=None, ordered=None):
--> 138 self._finalize(categories, ordered, fastpath=False)
139
140 <strong i="12">@classmethod</strong>
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in _finalize(self, categories, ordered, fastpath)
161 if categories is not None:
162 categories = self.validate_categories(categories,
--> 163 fastpath=fastpath)
164
165 self._categories = categories
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in validate_categories(categories, fastpath)
318 if not fastpath:
319
--> 320 if categories.hasnans:
321 raise ValueError('Categorial categories cannot be null')
322
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in hasnans(self)
2237 """ return if I have any nans; enables various perf speedups """
2238 if self._can_hold_na:
-> 2239 return self._isnan.any()
2240 else:
2241 return False
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in _isnan(self)
2218 """ return if each value is nan"""
2219 if self._can_hold_na:
-> 2220 return isna(self)
2221 else:
2222 # shouldn't reach to this condition by checking hasnans beforehand
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\missing.py in isna(obj)
104 Name: 1, dtype: bool
105 """
--> 106 return _isna(obj)
107
108
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\missing.py in _isna_new(obj)
115 # hack (for now) because MI registers as ndarray
116 elif isinstance(obj, ABCMultiIndex):
--> 117 raise NotImplementedError("isna is not defined for MultiIndex")
118 elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass,
119 ABCExtensionArray)):
NotImplementedError: isna is not defined for MultiIndex
index_1 | index_2 | label1 | label2
- | - | - | -
A | A1 | 1.0 | NaN
|| A2 | NaN | 2.0
B | A1 | 3,0 | NaN
|| A2 | NaN | 4.0
pd.show_versions()
ΠΊΠΎΠΌΠΌΠΈΡ: ΠΠ΅Ρ
ΠΏΠΈΡΠΎΠ½: 3.6.5.final.0
Π±ΠΈΡΡ Python: 64
ΠΠ‘: Windows
ΠΠ‘-ΡΠ΅Π»ΠΈΠ·: 10
ΠΌΠ°ΡΠΈΠ½Π°: AMD64
ΠΏΡΠΎΡΠ΅ΡΡΠΎΡ: Intel64 Family 6 Model 85 Stepping 4, GenuineIntel
byteorder: ΠΌΠ°Π»Π΅Π½ΡΠΊΠΈΠΉ
LC_ALL: ΠΠ΅Ρ
Π―ΠΠ«Π: ΠΠ΅Ρ
ΠΠΠ‘Π’Π: ΠΠ΅Ρ.
ΠΏΠ°Π½Π΄Ρ: 0.23.4
pytest: 3.5.1
ΠΏΡΠ½ΠΊΡ: 10.0.1
ΠΈΠ½ΡΡΡΡΠΌΠ΅Π½ΡΡ Π½Π°ΡΡΡΠΎΠΉΠΊΠΈ: 39.1.0
Cython: 0,28,2
ΡΠΈΡΠ»ΠΎ: 1.15.4
scipy: 1.1.0
pyarrow: ΠΠ΅Ρ
xarray: ΠΠ΅Ρ
IPython: 6.4.0
ΡΡΠΈΠ½ΠΊΡ: 1.7.4
ΠΡΡΡΠΈ: 0.5.0
dateutil: 2.7.3
pytz: 2018.4
blosc: ΠΠ΅Ρ
ΡΠ·ΠΊΠΎΠ΅ ΠΌΠ΅ΡΡΠΎ: 1.2.1
ΡΠ°Π±Π»ΠΈΡΡ: 3.4.3
numexpr: 2.6.5
ΠΏΠ΅ΡΠΎ: ΠΠ΅Ρ
matplotlib: 2.2.2
openpyxl: 2.5.3
xlrd: 1.1.0
xlwt: 1.3.0
xlsxwriter: 1.0.4
Π»Ρ
ΠΌΠ»: 4.2.1
BS4: 4.6.0
html5lib: 1.0.1
sqlalchemy: 1.2.7
pymysql: ΠΠ΅Ρ
psycopg2: ΠΠ΅Ρ
jinja2: 2.10
s3fs: ΠΠ΅Ρ
fastparquet: ΠΠ΅Ρ
pandas_gbq: ΠΠ΅Ρ
pandas_datareader: ΠΠ΅Ρ
ΠΡΡΡ ΠΎΠ±Π½ΠΎΠ²Π»Π΅Π½ΠΈΡ ΠΏΠΎ ΡΡΠΎΠΌΡ ΠΏΠΎΠ²ΠΎΠ΄Ρ? ΠΠ°ΡΠΊΠΎΠ»ΡΠΊΠΎ Ρ ΠΏΠΎΠ½ΠΈΠΌΠ°Ρ, Π² Π½Π°ΡΡΠΎΡΡΠ΅Π΅ Π²ΡΠ΅ΠΌΡ ΠΌΠ΅ΡΠΎΠ΄ pivot()
ΠΏΡΠΎΡΡΠΎ Π½Π΅ ΡΠ°Π±ΠΎΡΠ°Π΅Ρ Ρ Π½Π΅ΡΠΊΠΎΠ»ΡΠΊΠΈΠΌΠΈ ΠΈΠ½Π΄Π΅ΠΊΡΠ°ΡΠΎΡΠ°ΠΌΠΈ, Π°ΡΠ³ΡΠΌΠ΅Π½Ρ index
Π½Π΅ ΠΏΡΠΈΠ½ΠΈΠΌΠ°Π΅Ρ ΡΠΏΠΈΡΠΎΠΊ, ΠΈ ΠΊΠΎΠ³Π΄Π° None
Π΄Π΅ΠΉΡΡΠ²ΠΈΡΠ΅Π»ΡΠ½ΠΎ Π½Π΅ ΡΠ°Π±ΠΎΡΠ°Π΅Ρ, ΠΏΠΎΡΠΊΠΎΠ»ΡΠΊΡ ΠΎΠ½ ΠΏΡΡΠ°Π΅ΡΡΡ ΠΈΡΠΏΠΎΠ»ΡΠ·ΡΠΉΡΠ΅ ΡΡΡΠ΅ΡΡΠ²ΡΡΡΠΈΠΉ MultiIndex.
ΠΠ° Π΄Π°Π½Π½ΡΠΉ ΠΌΠΎΠΌΠ΅Π½Ρ Ρ ΡΠ΅ΡΠ°Ρ ΡΡΡ ΠΏΡΠΎΠ±Π»Π΅ΠΌΡ Ρ Π°ΠΊΠ΅ΡΡΠΊΠΈΠΌ ΡΠΏΠΎΡΠΎΠ±ΠΎΠΌ, Π³Π΅Π½Π΅ΡΠΈΡΡΡ ΠΎΠ΄ΠΈΠ½ ΠΈΠ½Π΄Π΅ΠΊΡ ΠΊΠ°ΠΊ ΠΎΠ±ΡΠ΅Π΄ΠΈΠ½Π΅Π½ΠΈΠ΅ Π½Π΅ΡΠΊΠΎΠ»ΡΠΊΠΈΡ ΡΡΠΎΠ²Π½Π΅ΠΉ ΠΈΡΡ ΠΎΠ΄Π½ΡΡ ΠΈΠ½Π΄Π΅ΠΊΡΠΎΠ², ΡΠ²ΠΎΠ΄Ρ ΠΈΡ ΠΈ Π·Π°ΡΠ΅ΠΌ ΡΠ΅ΠΊΠΎΠ½ΡΡΡΡΠΈΡΡΡ ΡΠ°Π·Π½ΡΠ΅ ΡΡΠΎΠ²Π½ΠΈ MultiIndex, ΡΠ°Π·Π΄Π΅Π»ΡΡ ΠΎΠ±ΡΠ΅Π΄ΠΈΠ½Π΅Π½Π½ΡΠΉ Π΅Π΄ΠΈΠ½ΡΠΉ ΠΈΠ½Π΄Π΅ΠΊΡ. Π‘Π»Π΅Π΄ΡΡ ΠΏΡΠΈΠΌΠ΅ΡΡ @srajanpaliwal :
(df.reset_index()
.assign(new_index=lambda dd: dd['index_1'].str.cat(dd['index_2'], sep='_'))
.pivot(index='new_index', columns='label', values='value')
.assign(index_1=lambda dd: dd.index.str.split('_').str.get(0),
index_2=lambda dd: dd.index.str.split('_').str.get(1))
.set_index(['index_1', 'index_2']))
ΠΡΠ²ΠΎΠ΄:
| | ΡΡΠΈΠΊΠ΅ΡΠΊΠ° | label1 | label2 |
| --------- | --------- | -------- | -------- |
| index_1 | index_1 | | |
| Π | A1 | 1.0 | NaN |
| | A2 | NaN | 2.0 | |
| B | A1 | 3.0 | NaN |
|| A2 | NaN | 4.0 | |
Π Π»ΡΠ±ΠΎΠΌ ΡΠ»ΡΡΠ°Π΅, Π΅ΡΡΡ Π»ΠΈ ΠΏΡΠΈΡΠΈΠ½Π°, ΠΏΠΎ ΠΊΠΎΡΠΎΡΠΎΠΉ MultiIndex Π½Π΅ ΠΏΡΠΈΠ½ΠΈΠΌΠ°Π΅ΡΡΡ Ρ ΠΎΠΏΠ΅ΡΠ°ΡΠΈΠ΅ΠΉ pivot()
?
Π‘ΠΏΠ°ΡΠΈΠ±ΠΎ Π·Π° ΡΠ΅ΡΠ΅Π½ΠΈΠ΅ https://github.com/pandas-dev/pandas/issues/23955#issuecomment -480804068. ΠΡΠ»ΠΈ ΡΡΠΎ ΠΊΠΎΠΌΡ-ΡΠΎ ΠΈΠ·Π±Π°Π²Π»ΡΠ΅Ρ ΠΎΡ Π½Π΅ΠΏΡΠΈΡΡΠ½ΠΎΡΡΠ΅ΠΉ, Π²ΠΎΡ ΠΎΠ±ΠΎΠ±ΡΠ΅Π½ΠΈΠ΅
def multiindex_pivot(df, columns=None, values=None):
#https://github.com/pandas-dev/pandas/issues/23955
names = list(df.index.names)
df = df.reset_index()
list_index = df[names].values
tuples_index = [tuple(i) for i in list_index] # hashable
df = df.assign(tuples_index=tuples_index)
df = df.pivot(index="tuples_index", columns=columns, values=values)
tuples_index = df.index # reduced
index = pd.MultiIndex.from_tuples(tuples_index, names=names)
df.index = index
return df
Π½Π΅Π±ΠΎΠ»ΡΡΠ°Ρ ΠΊΠΎΡΡΠ΅ΠΊΡΠΈΡΠΎΠ²ΠΊΠ° ΠΊΠΎΠΌΠΌΠ΅Π½ΡΠ°ΡΠΈΡ @gmacario Π΄Π»Ρ Π΅Π΄ΠΈΠ½ΠΎΠΎΠ±ΡΠ°Π·ΠΈΡ Ρ api pivot
def multiindex_pivot(df, index=None, columns=None, values=None):
#https://github.com/pandas-dev/pandas/issues/23955
if index is None:
names = list(df.index.names)
df = df.reset_index()
else:
names = index
list_index = df[names].values
tuples_index = [tuple(i) for i in list_index] # hashable
df = df.assign(tuples_index=tuples_index)
df = df.pivot(index="tuples_index", columns=columns, values=values)
tuples_index = df.index # reduced
index = pd.MultiIndex.from_tuples(tuples_index, names=names)
df.index = index
return df
ΠΡΠΈΠΌΠ΅Π½Π΅Π½ΠΈΠ΅:
df.pipe(multiindex_pivot, index=['idx_column1', 'idx_column2'], columns='foo', values='bar')
Π΅ΡΠ΅ ΠΎΠ΄Π½ΠΎ Π½Π΅Π±ΠΎΠ»ΡΡΠΎΠ΅ ΡΠ»ΡΡΡΠ΅Π½ΠΈΠ΅, ΠΊΠΎΡΠΎΡΠΎΠ΅ ΡΠ°ΠΊΠΆΠ΅ ΠΏΠΎΠ·Π²ΠΎΠ»ΡΠ΅Ρ ΠΈΡΠΏΠΎΠ»ΡΠ·ΠΎΠ²Π°ΡΡ Π½Π΅ΡΠΊΠΎΠ»ΡΠΊΠΎ columns=
(Π½Π΅ ΠΏΡΠΎΠ²Π΅ΡΠ΅Π½ΠΎ ΠΏΠΎΠ»Π½ΠΎΡΡΡΡ, Π½ΠΎ ΡΠ°Π±ΠΎΡΠ°Π΅Ρ Π² ΠΌΠΎΠΈΡ
ΠΏΡΠΈΠΌΠ΅ΡΠ°Ρ
):
def multiindex_pivot(df, index=None, columns=None, values=None):
# https://github.com/pandas-dev/pandas/issues/23955
if index is None:
names = list(df.index.names)
df = df.reset_index()
else:
names = index
df = df.assign(tuples_index=[tuple(i) for i in df[names].values]) # hashable
df = df.assign(tuples_columns=[tuple(i) for i in df[columns].values]) # hashable
df = df.pivot(index='tuples_index', columns='tuples_columns', values=values)
df.index = pd.MultiIndex.from_tuples(df.index, names=names) # reduced
df.columns = pd.MultiIndex.from_tuples(df.columns, names=columns) # reduced
return df
ΠΡΠΈΠΌΠ΅Π½Π΅Π½ΠΈΠ΅:
df.pipe(multiindex_pivot,
index=['idx_column1', 'idx_column2'],
columns=['col_column1', 'col_column2'],
values='bar')
ΠΡΠ΅ ΠΎΠ΄Π½Π° Π½Π΅ΠΌΠ½ΠΎΠ³ΠΎ ΡΠ»ΡΡΡΠ΅Π½Π½Π°Ρ Π²Π΅ΡΡΠΈΡ:
def multiIndex_pivot(df, index = None, columns = None, values = None):
# https://github.com/pandas-dev/pandas/issues/23955
output_df = df.copy(deep = True)
if index is None:
names = list(output_df.index.names)
output_df = output_df.reset_index()
else:
names = index
output_df = output_df.assign(tuples_index = [tuple(i) for i in output_df[names].values])
if isinstance(columns, list):
output_df = output_df.assign(tuples_columns = [tuple(i) for i in output_df[columns].values]) # hashable
output_df = output_df.pivot(index = 'tuples_index', columns = 'tuples_columns', values = values)
output_df.columns = pd.MultiIndex.from_tuples(output_df.columns, names = columns) # reduced
else:
output_df = output_df.pivot(index = 'tuples_index', columns = columns, values = values)
output_df.index = pd.MultiIndex.from_tuples(output_df.index, names = names)
return output_df
ΠΡΠΈΠΌΠ΅Π½Π΅Π½ΠΈΠ΅:
df.pipe(multiIndex_pivot, index = ['idx_column1', 'idx_column2'], columns = ['col_column1', 'col_column2'], values = 'bar')
Π‘Π°ΠΌΡΠΉ ΠΏΠΎΠ»Π΅Π·Π½ΡΠΉ ΠΊΠΎΠΌΠΌΠ΅Π½ΡΠ°ΡΠΈΠΉ
Π‘ΠΏΠ°ΡΠΈΠ±ΠΎ Π·Π° ΡΠ΅ΡΠ΅Π½ΠΈΠ΅ https://github.com/pandas-dev/pandas/issues/23955#issuecomment -480804068. ΠΡΠ»ΠΈ ΡΡΠΎ ΠΊΠΎΠΌΡ-ΡΠΎ ΠΈΠ·Π±Π°Π²Π»ΡΠ΅Ρ ΠΎΡ Π½Π΅ΠΏΡΠΈΡΡΠ½ΠΎΡΡΠ΅ΠΉ, Π²ΠΎΡ ΠΎΠ±ΠΎΠ±ΡΠ΅Π½ΠΈΠ΅