Skip to content

Commit 49b390b

Browse files
committed
Fix handling of C(pandas_categorical_object)
1 parent 1f047e6 commit 49b390b

1 file changed

Lines changed: 25 additions & 31 deletions

File tree

patsy/categorical.py

Lines changed: 25 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -177,18 +177,18 @@ def sniff(self, data):
177177
if hasattr(data, "contrast"):
178178
self._contrast = data.contrast
179179
# returns a bool: are we confident that we found all the levels?
180-
if safe_is_pandas_categorical(data):
181-
# pandas.Categorical has its own NA detection, so don't try to
182-
# second-guess it.
183-
self._levels = tuple(pandas_Categorical_categories(data))
184-
return True
185180
if isinstance(data, _CategoricalBox):
186181
if data.levels is not None:
187182
self._levels = tuple(data.levels)
188183
return True
189184
else:
190185
# unbox and fall through
191186
data = data.data
187+
if safe_is_pandas_categorical(data):
188+
# pandas.Categorical has its own NA detection, so don't try to
189+
# second-guess it.
190+
self._levels = tuple(pandas_Categorical_categories(data))
191+
return True
192192
# fastpath to avoid doing an item-by-item iteration over boolean
193193
# arrays, as requested by #44
194194
if hasattr(data, "dtype") and safe_issubdtype(data.dtype, np.bool_):
@@ -227,32 +227,26 @@ def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None):
227227
assert sniffer.levels_contrast() == (exp_levels, exp_contrast)
228228

229229
if have_pandas_categorical:
230-
t([], [pandas.Categorical.from_array([1, 2, None])],
231-
True, (1, 2))
232-
# check order preservation
233-
t([], [pandas_Categorical_from_codes([1, 0], ["a", "b"])],
234-
True, ("a", "b"))
235-
t([], [pandas_Categorical_from_codes([1, 0], ["b", "a"])],
236-
True, ("b", "a"))
237-
# check that if someone sticks a .contrast field onto a Categorical
238-
# object, we pick it up:
239-
c = pandas.Categorical.from_array(["a", "b"])
240-
c.contrast = "CONTRAST"
241-
t([], [c], True, ("a", "b"), "CONTRAST")
242-
243-
if have_pandas_categorical_dtype:
244-
t([], [pandas.Series([1, 2, None], dtype="category")],
245-
True, (1, 2))
246-
# check order preservation
247-
t([], [pandas.Series(pandas_Categorical_from_codes([1, 0], ["a", "b"]))],
248-
True, ("a", "b"))
249-
t([], [pandas.Series(pandas_Categorical_from_codes([1, 0], ["b", "a"]))],
250-
True, ("b", "a"))
251-
# check that if someone sticks a .contrast field onto a categorical
252-
# Series, then we pick it up.
253-
s = pandas.Series(["a", "b"], dtype="category")
254-
s.contrast = "CONTRAST"
255-
t([], [s], True, ("a", "b"), "CONTRAST")
230+
# We make sure to test with both boxed and unboxed pandas objects,
231+
# because we used to have a bug where boxed pandas objects would be
232+
# treated as categorical, but their levels would be lost...
233+
preps = [lambda x: x,
234+
C]
235+
if have_pandas_categorical_dtype:
236+
preps += [pandas.Series,
237+
lambda x: C(pandas.Series(x))]
238+
for prep in preps:
239+
t([], [prep(pandas.Categorical.from_array([1, 2, None]))],
240+
True, (1, 2))
241+
# check order preservation
242+
t([], [prep(pandas_Categorical_from_codes([1, 0], ["a", "b"]))],
243+
True, ("a", "b"))
244+
t([], [prep(pandas_Categorical_from_codes([1, 0], ["b", "a"]))],
245+
True, ("b", "a"))
246+
# check that if someone sticks a .contrast field onto our object
247+
obj = prep(pandas.Categorical.from_array(["a", "b"]))
248+
obj.contrast = "CONTRAST"
249+
t([], [obj], True, ("a", "b"), "CONTRAST")
256250

257251
t([], [C([1, 2]), C([3, 2])], False, (1, 2, 3))
258252
# check order preservation

0 commit comments

Comments
 (0)