Hi,
When I run the following code, there is an error:
ValueError: For numerical factors, num_columns must be an int
================
import numpy as np
import pandas as pd
from patsy import dmatrices
from sklearn.linear_model import LogisticRegression
X = [0.5,0.75,1.0,1.25,1.5,1.75,1.75,2.0,2.25,2.5,2.75,3.0,3.25,
3.5,4.0,4.25,4.5,4.75,5.0,5.5]
y = [0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1]
zipped = list(zip(X,y))
df = pd.DataFrame(zipped,columns = ['study_hrs','p_or_f'])
y, X = dmatrices('p_or_f ~ study_hrs', df, return_type="dataframe")
=======================
I have check 'df' is this type:
=============
type(df)
Out[25]: pandas.core.frame.DataFrame
=============
I cannot figure out where the problem is. Can you help me?
Thanks.
Error message:
..........
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
C:\Users\rj\pyprj\stackoverflow_logisticregression0.py in <module>()
17 df = pd.DataFrame(zipped,columns = ['study_hrs','p_or_f'])
18
---> 19 y, X = dmatrices('p_or_f ~ study_hrs', df, return_type="dataframe")
20
21 y = np.ravel(y)
C:\Users\rj\AppData\Local\Enthought\Canopy\User\lib\site-packages\patsy\highlevel.pyc
in dmatrices(formula_like, data, eval_env, NA_action, return_type)
295 eval_env = EvalEnvironment.capture(eval_env, reference=1)
296 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 297 NA_action, return_type)
298 if lhs.shape[1] == 0:
299 raise PatsyError("model is missing required outcome variables")
C:\Users\rj\AppData\Local\Enthought\Canopy\User\lib\site-packages\patsy\highlevel.pyc
in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
150 return iter([data])
151 design_infos = _try_incr_builders(formula_like, data_iter_maker,
eval_env,
--> 152 NA_action)
153 if design_infos is not None:
154 return build_design_matrices(design_infos, data,
C:\Users\rj\AppData\Local\Enthought\Canopy\User\lib\site-packages\patsy\highlevel.pyc
in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action)
55 data_iter_maker,
56 eval_env,
---> 57 NA_action)
58 else:
59 return None
C:\Users\rj\AppData\Local\Enthought\Canopy\User\lib\site-packages\patsy\build.pyc
in design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action)
704 factor_states[factor],
705 num_columns=num_column_counts[factor],
--> 706 categories=None)
707 else:
708 assert factor in cat_levels_contrasts
C:\Users\rj\AppData\Local\Enthought\Canopy\User\lib\site-packages\patsy\design_info.pyc
in __init__(self, factor, type, state, num_columns, categories)
86 if self.type == "numerical":
87 if not isinstance(num_columns, int):
---> 88 raise ValueError("For numerical factors, num_columns "
89 "must be an int")
90 if categories is not None:
ValueError: For numerical factors, num_columns must be an int