Pandas version checks
-
[X] I have checked that this issue has not already been reported.
-
[X] I have confirmed this bug exists on the latest version of pandas.
-
[X] I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import pandas as pd
import numpy as np
data = pd.DataFrame({
'categ': ['B', 'T', 'A'],
'Int': [np.nan,1,2,]
}
).astype({"categ":"category"})
filler = data['categ'].apply(lambda x: {'B': 42, 'T': 51, 'A': 666}.get(x))
data['Int'].fillna(filler)
---------------------------------------------------------------------------
LossySetitemError Traceback (most recent call last)
File ~/opt/anaconda3/envs/python_3_12/lib/python3.12/site-packages/pandas/core/internals/blocks.py:1302, in Block.where(self, other, cond, _downcast, using_cow)
1298 try:
1299 # try/except here is equivalent to a self._can_hold_element check,
1300 # but this gets us back 'casted' which we will re-use below;
1301 # without using 'casted', expressions.where may do unwanted upcasts.
-> 1302 casted = np_can_hold_element(values.dtype, other)
1303 except (ValueError, TypeError, LossySetitemError):
1304 # we cannot coerce, return a compat dtype
File ~/opt/anaconda3/envs/python_3_12/lib/python3.12/site-packages/pandas/core/dtypes/cast.py:1816, in np_can_hold_element(dtype, element)
1814 if tipo.kind not in "iuf":
1815 # Anything other than float/integer we cannot hold
-> 1816 raise LossySetitemError
1817 if not isinstance(tipo, np.dtype):
1818 # i.e. nullable IntegerDtype or FloatingDtype;
1819 # we can put this into an ndarray losslessly iff it has no NAs
LossySetitemError:
During handling of the above exception, another exception occurred:
AssertionError Traceback (most recent call last)
Cell In[9], line 10
3 num_rows = 5
4 data = pd.DataFrame({
5 'categ': ['B', 'T', 'A'],
6 'Int': [np.nan,1,2,]
7 }
8 ).astype({"categ":"category"})
---> 10 data['Int'].fillna(data['categ'].apply(lambda x:
11 {'B': 42, 'T': 51, 'A': 666}.get(x))
12 )
File ~/opt/anaconda3/envs/python_3_12/lib/python3.12/site-packages/pandas/core/generic.py:7212, in NDFrame.fillna(self, value, method, axis, inplace, limit, downcast)
7205 else:
7206 raise TypeError(
7207 '"value" parameter must be a scalar, dict '
7208 "or Series, but you passed a "
7209 f'"{type(value).__name__}"'
7210 )
-> 7212 new_data = self._mgr.fillna(
7213 value=value, limit=limit, inplace=inplace, downcast=downcast
7214 )
7216 elif isinstance(value, (dict, ABCSeries)):
7217 if axis == 1:
File ~/opt/anaconda3/envs/python_3_12/lib/python3.12/site-packages/pandas/core/internals/base.py:173, in DataManager.fillna(self, value, limit, inplace, downcast)
169 if limit is not None:
170 # Do this validation even if we go through one of the no-op paths
171 limit = libalgos.validate_limit(None, limit=limit)
--> 173 return self.apply_with_block(
174 "fillna",
175 value=value,
176 limit=limit,
177 inplace=inplace,
178 downcast=downcast,
179 using_cow=using_copy_on_write(),
180 )
File ~/opt/anaconda3/envs/python_3_12/lib/python3.12/site-packages/pandas/core/internals/managers.py:354, in BaseBlockManager.apply(self, f, align_keys, **kwargs)
352 applied = b.apply(f, **kwargs)
353 else:
--> 354 applied = getattr(b, f)(**kwargs)
355 result_blocks = extend_blocks(applied, result_blocks)
357 out = type(self).from_blocks(result_blocks, self.axes)
File ~/opt/anaconda3/envs/python_3_12/lib/python3.12/site-packages/pandas/core/internals/blocks.py:1419, in Block.fillna(self, value, limit, inplace, downcast, using_cow)
1415 nbs = self.putmask(mask.T, value, using_cow=using_cow)
1416 else:
1417 # without _downcast, we would break
1418 # test_fillna_dtype_conversion_equiv_replace
-> 1419 nbs = self.where(value, ~mask.T, _downcast=False)
1421 # Note: blk._maybe_downcast vs self._maybe_downcast(nbs)
1422 # makes a difference bc blk may have object dtype, which has
1423 # different behavior in _maybe_downcast.
1424 return extend_blocks(
1425 [
1426 blk._maybe_downcast([blk], downcast=downcast, using_cow=using_cow)
1427 for blk in nbs
1428 ]
1429 )
File ~/opt/anaconda3/envs/python_3_12/lib/python3.12/site-packages/pandas/core/internals/blocks.py:1309, in Block.where(self, other, cond, _downcast, using_cow)
1303 except (ValueError, TypeError, LossySetitemError):
1304 # we cannot coerce, return a compat dtype
1306 if self.ndim == 1 or self.shape[0] == 1:
1307 # no need to split columns
-> 1309 block = self.coerce_to_target_dtype(other)
1310 blocks = block.where(orig_other, cond, using_cow=using_cow)
1311 return self._maybe_downcast(
1312 blocks, downcast=_downcast, using_cow=using_cow
1313 )
File ~/opt/anaconda3/envs/python_3_12/lib/python3.12/site-packages/pandas/core/internals/blocks.py:490, in Block.coerce_to_target_dtype(self, other, warn_on_upcast)
481 warnings.warn(
482 f"Setting an item of incompatible dtype is deprecated "
483 "and will raise in a future error of pandas. "
(...)
487 stacklevel=find_stack_level(),
488 )
489 if self.values.dtype == new_dtype:
--> 490 raise AssertionError(
491 f"Did not expect new dtype {new_dtype} to equal self.dtype "
492 f"{self.values.dtype}. Please report a bug at "
493 "https://github.com/pandas-dev/pandas/issues."
494 )
495 return self.astype(new_dtype, copy=False)
AssertionError: Did not expect new dtype float64 to equal self.dtype float64. Please report a bug at https://github.com/pandas-dev/pandas/issues.
Issue Description
When the column is categ is put as categorical, the code does not work.
Expected Behavior
The code should behave the same with the categ column being an object or a categorical?
Installed Versions
Tested with multiple versions of Python (3.10, 3.12, 3.8). The bug did not exist in old version of pandas (1.2.1 is ok).
Comment From: lithomas1
cc @MarcoGorelli.
Comment From: MarcoGorelli
thanks for the report! will take a look
Comment From: MarcoGorelli
this came from https://github.com/pandas-dev/pandas/pull/55201/files
so, not sure it's pdep6-related - going to ping @jbrockmendel on this one
Comment From: lithomas1
Huh, the bisecting doesn't look right.
That PR is merged for 2.2, but this bug is already there in 2.1.
Can you try bisecting further back?
Comment From: adrienpacifico
I had the bug in pandas 1.5.x, and I did not have it in 1.2.1. The issue was likely introduced between these two versions.
Comment From: jbrockmendel
Looks like in np_can_hold_element we should be casting the Categorical to float64
Comment From: jbrockmendel
I think this was fixed by #62282. Could use a test.