plotnine
plotnine copied to clipboard
stats_smooth function reports strange errors when arg is set as 'method=loess'
The code below will cause the issue:
ggplot(mtcars, aes('wt', 'mpg', color='gear')) + geom_point() + stat_smooth(aes('wt', 'mpg'), method='loess') + facet_wrap(('gear', 'am'))
I have installed scikit-misc since it complains about missing modules.
Below is the trace back:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/usr/local/lib/python3.7/site-packages/IPython/core/formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
/usr/local/lib/python3.7/site-packages/IPython/lib/pretty.py in pretty(self, obj)
400 if cls is not object \
401 and callable(cls.__dict__.get('__repr__')):
--> 402 return _repr_pprint(obj, self, cycle)
403
404 return _default_pprint(obj, self, cycle)
/usr/local/lib/python3.7/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
695 """A pprint that just redirects to the normal repr function."""
696 # Find newlines and replace them with p.break_()
--> 697 output = repr(obj)
698 for idx,output_line in enumerate(output.splitlines()):
699 if idx:
/usr/local/lib/python3.7/site-packages/plotnine/ggplot.py in __repr__(self)
86 # in the jupyter notebook.
87 if not self.figure:
---> 88 self.draw()
89 plt.show()
90 return '<ggplot: (%d)>' % self.__hash__()
/usr/local/lib/python3.7/site-packages/plotnine/ggplot.py in draw(self, return_ggplot)
179 # new frames knowing that they are separate from the original.
180 with pd.option_context('mode.chained_assignment', None):
--> 181 return self._draw(return_ggplot)
182
183 def _draw(self, return_ggplot=False):
/usr/local/lib/python3.7/site-packages/plotnine/ggplot.py in _draw(self, return_ggplot)
186 # assign a default theme
187 self = deepcopy(self)
--> 188 self._build()
189
190 # If no theme we use the default
/usr/local/lib/python3.7/site-packages/plotnine/ggplot.py in _build(self)
297
298 # Apply and map statistics
--> 299 layers.compute_statistic(layout)
300 layers.map_statistic(self)
301
/usr/local/lib/python3.7/site-packages/plotnine/layer.py in compute_statistic(self, layout)
84 def compute_statistic(self, layout):
85 for l in self:
---> 86 l.compute_statistic(layout)
87
88 def map_statistic(self, plot):
/usr/local/lib/python3.7/site-packages/plotnine/layer.py in compute_statistic(self, layout)
358 data = self.stat.use_defaults(data)
359 data = self.stat.setup_data(data)
--> 360 data = self.stat.compute_layer(data, params, layout)
361 self.data = data
362
/usr/local/lib/python3.7/site-packages/plotnine/stats/stat.py in compute_layer(cls, data, params, layout)
266 return cls.compute_panel(pdata, pscales, **params)
267
--> 268 return groupby_apply(data, 'PANEL', fn)
269
270 @classmethod
/usr/local/lib/python3.7/site-packages/plotnine/utils.py in groupby_apply(df, cols, func, *args, **kwargs)
631 # function fn should be free to modify dataframe d, therefore
632 # do not mark d as a slice of df i.e no SettingWithCopyWarning
--> 633 lst.append(func(d, *args, **kwargs))
634 return pd.concat(lst, axis=axis, ignore_index=True)
635
/usr/local/lib/python3.7/site-packages/plotnine/stats/stat.py in fn(pdata)
264 return pdata
265 pscales = layout.get_scales(pdata['PANEL'].iat[0])
--> 266 return cls.compute_panel(pdata, pscales, **params)
267
268 return groupby_apply(data, 'PANEL', fn)
/usr/local/lib/python3.7/site-packages/plotnine/stats/stat.py in compute_panel(cls, data, scales, **params)
297 stats = []
298 for _, old in data.groupby('group'):
--> 299 new = cls.compute_group(old, scales, **params)
300 unique = uniquecols(old)
301 missing = unique.columns.difference(new.columns)
/usr/local/lib/python3.7/site-packages/plotnine/stats/stat_smooth.py in compute_group(cls, data, scales, **params)
194 xseq = np.linspace(rangee[0], rangee[1], n)
195
--> 196 return predictdf(data, xseq, **params)
/usr/local/lib/python3.7/site-packages/plotnine/stats/smoothers.py in predictdf(data, xseq, **params)
41 raise PlotnineError()
42
---> 43 return method(data, xseq, **params)
44
45
/usr/local/lib/python3.7/site-packages/plotnine/stats/smoothers.py in loess(data, xseq, **params)
200
201 lo = loess_klass(data['x'], data['y'], weights, **kwargs)
--> 202 lo.fit()
203
204 data = pd.DataFrame({'x': xseq})
_loess.pyx in _loess.loess.fit()
ValueError: b'There are other near singularities as well. 0.063227\n'
There are not enough distinct data points in some or all the local groups to run a loess regression? Try increasing the value of the span.