changed weights= to end of functions

Uses dropna(subset=...) to delete where nan's over the columns supplied Also doing this in the beginning so we do not have to duplicate this logic
pandas-dev · Twizzledrizzle · Oct 27, 2015 · Oct 27, 2015 · Oct 27, 2015 · Oct 27, 2015
commit cbe68ecb97cb68c632f65f0e788e2fa30095a3b1
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
@@ -2770,9 +2770,9 @@ def plot_group(group, ax):
     return fig
 
 
-def hist_frame(data, column=None, weights=None, by=None, grid=True,
-               xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None,
-               sharex=False, sharey=False, figsize=None, layout=None, bins=10,
+def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None,
+               xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False,
+               sharey=False, figsize=None, layout=None, bins=10, weights=None,
                **kwds):
     """
     Draw histogram of the DataFrame's series using matplotlib / pylab.
@@ -2782,8 +2782,6 @@ def hist_frame(data, column=None, weights=None, by=None, grid=True,
     data : DataFrame
     column : string or sequence
         If passed, will be used to limit data to a subset of columns
-    weights : string or sequence
-        If passed, will be used to weight the data
     by : object, optional
         If passed, then used to form histograms for separate groups
     grid : boolean, default True
@@ -2810,38 +2808,42 @@ def hist_frame(data, column=None, weights=None, by=None, grid=True,
     layout: (optional) a tuple (rows, columns) for the layout of the histograms
     bins: integer, default 10
         Number of histogram bins to be used
+    weights : string or sequence
+        If passed, will be used to weight the data
     kwds : other plotting keyword arguments
         To be passed to hist function
     """
+    subset_cols_drop_nan = []
+    if weights is not None:
+        if isinstance(weights, np.ndarray):
+            # weights supplied as an array instead of a part of the dataframe
+            if 'weights' in data.columns:
+                raise NameError('weights already in data.columns. Could not ' +
+                                'add dummy column')
+            data = data.copy()
+            data['weights'] = weights
+            weights = 'weights'
+        subset_cols_drop_nan.append(weights)
+    if column is not None:
+        subset_cols_drop_nan.append(column)
+    data = data.dropna(subset=subset_cols_drop_nan)
+
     if by is not None:
-        axes = grouped_hist(data, column=column, weights=weights, by=by, ax=ax, grid=grid, figsize=figsize,
+        axes = grouped_hist(data, column=column, by=by, ax=ax, grid=grid, figsize=figsize,
                             sharex=sharex, sharey=sharey, layout=layout, bins=bins,
                             xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot,
-                            **kwds)
+                            weights=weights, **kwds)
         return axes
 
-    inx_na = np.zeros(len(data), dtype=bool)
     if weights is not None:
-        # first figure out if given my column name, or by an array
-        if isinstance(weights, str):
-            weights = data[weights]
-        if isinstance(weights, np.ndarray) == False:
-            weights = weights.values
-        # remove fields where we have nan in weights OR in group
-        # for both data sets
-        inx_na = (np.isnan(weights))
+        weights = data[weights]
+        weights = weights._get_numeric_data()
 
     if column is not None:
         if not isinstance(column, (list, np.ndarray, Index)):
             column = [column]
         data = data[column]
     data = data._get_numeric_data()
    inx_na |= np.isnan(data.T.values)[0]
-
-    data = data.ix[~inx_na]
-    if weights is not None:
-        weights = weights[~inx_na]
-
     naxes = len(data.columns)
 
     fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False,
@@ -2935,18 +2937,17 @@ def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None,
     return axes
 
 
-def grouped_hist(data, column=None, weights=None, by=None, ax=None, bins=50,
+def grouped_hist(data, column=None, by=None, ax=None, bins=50,
                  figsize=None, layout=None, sharex=False, sharey=False, rot=90,
                  grid=True, xlabelsize=None, xrot=None, ylabelsize=None,
-                 yrot=None, **kwargs):
+                 yrot=None, weights=None, **kwargs):
     """
     Grouped histogram
 
     Parameters
     ----------
     data: Series/DataFrame
     column: object, optional
-    weights: object, optional
     by: object, optional
     ax: axes, optional
     bins: int, default 50
@@ -2956,6 +2957,7 @@ def grouped_hist(data, column=None, weights=None, by=None, ax=None, bins=50,
     sharey: boolean, default False
     rot: int, default 90
     grid: bool, default True
+    weights: object, optional
     kwargs: dict, keyword arguments passed to matplotlib.Axes.hist
 
     Returns
@@ -2965,25 +2967,20 @@ def grouped_hist(data, column=None, weights=None, by=None, ax=None, bins=50,
     def plot_group(group, ax, weights=None):
         if isinstance(group, np.ndarray) == False:
             group = group.values
-        inx_na = np.isnan(group)
         if weights is not None:
-            # remove fields where we have nan in weights OR in group
-            # for both data sets
             if isinstance(weights, np.ndarray) == False:
                 weights = weights.values
-            inx_na |= (np.isnan(weights))
-            weights = weights[~inx_na]
-        group = group[~inx_na]
         if len(group) > 0:
             # if length is less than 0, we had only NaN's for this group
             # nothing to print!
             ax.hist(group, weights=weights, bins=bins, **kwargs)
 
     xrot = xrot or rot
 
-    fig, axes = _grouped_plot(plot_group, data, column=column, weights=weights,
-                              by=by, sharex=sharex, sharey=sharey, ax=ax,
-                              figsize=figsize, layout=layout, rot=rot)
+    fig, axes = _grouped_plot(plot_group, data, column=column, by=by,
+                              sharex=sharex, sharey=sharey, ax=ax,
+                              figsize=figsize, layout=layout, rot=rot,
+                              weights=weights)
 
     _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot,
              ylabelsize=ylabelsize, yrot=yrot)
@@ -3068,9 +3065,9 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None,
     return ret
 
 
-def _grouped_plot(plotf, data, column=None, weights=None, by=None,
+def _grouped_plot(plotf, data, column=None, by=None,
                   numeric_only=True, figsize=None, sharex=True, sharey=True,
-                  layout=None, rot=0, ax=None, **kwargs):
+                  layout=None, rot=0, ax=None, weights=None, **kwargs):
     from pandas import DataFrame
 
     if figsize == 'default':
@@ -3079,22 +3076,13 @@ def _grouped_plot(plotf, data, column=None, weights=None, by=None,
                       "size by tuple instead", FutureWarning, stacklevel=4)
         figsize = None
 
-    added_weights_dummy_column = False
-    if isinstance(weights, np.ndarray):
-        # weights supplied as an array instead of a part of the datafra
6CCC
me
-        data['weights'] = weights
-        weights = 'weights'
-        added_weights_dummy_column = True
-
     grouped = data.groupby(by)
+
+    if weights is not None:
+        weights = grouped[weights]
     if column is not None:
-        if weights is not None:
-            weights = grouped[weights]
         grouped = grouped[column]
 
-    if added_weights_dummy_column:
-        data = data.drop('weights', axis=1)
-
     naxes = len(grouped)
     fig, axes = _subplots(naxes=naxes, figsize=figsize,
                           sharex=sharex, sharey=sharey, ax=ax,