diff --git a/examples/plot_grid_search.py b/examples/plot_grid_search.py
new file mode 100644
index 0000000000000..dff81b743c6d8
--- /dev/null
+++ b/examples/plot_grid_search.py
@@ -0,0 +1,46 @@
+"""
+=====================================================
+Visualizing results of high dimensional grid searches
+=====================================================
+
+Often one is faced with combining feature extraction, feature selection
+and classification into a complex pipeline.
+Each individual step usually has many tunable parameters.  Finding the
+important parameters for a given task and picking robust settings is often
+hard.
+
+This example show how to visualize results of a grid search with
+many interacting parameters.
+The ``DecisionTreeClassifier`` is a good model for a complex pipeline as there
+are many parameters to tweak, but often only few have significant influence.
+"""
+print __doc__
+
+import pylab as pl
+
+from sklearn.datasets import make_classification
+from sklearn.grid_search import GridSearchCV
+from sklearn.tree import DecisionTreeClassifier
+
+X, y = make_classification(n_samples=100, n_features=10, random_state=0)
+
+param_grid = {'max_depth': range(1, 8), 'min_samples_split': [1, 2, 3, 4],
+        'max_features': [1, 3, 5, 8, 10]}
+
+grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid=param_grid,
+        cv=5)
+grid_search.fit(X, y)
+
+cv_scores = grid_search.scores_
+
+fig, axes = pl.subplots(1, 3)
+axes = axes.ravel()
+for ax, param in zip(axes, cv_scores.params):
+    means, errors = cv_scores.accumulate(param, 'max')
+    ax.boxplot(cv_scores.values[param], means, yerr=errors)
+    ax.set_xlabel(param)
+    ax.set_ylabel("accuracy")
+    ax.set_ylim(0.6, 0.95)
+fig.set_size_inches((12, 4), forward=True)
+pl.subplots_adjust(left=0.07, right=0.95, bottom=0.15, wspace=0.26)
+pl.show()
diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py
index 465098199be2c..5246f17017b0b 100644
--- a/examples/svm/plot_rbf_parameters.py
+++ b/examples/svm/plot_rbf_parameters.py
@@ -105,21 +105,24 @@
     pl.axis('tight')
 
 # plot the scores of the grid
-# grid_scores_ contains parameter settings and scores
-score_dict = grid.grid_scores_
-
-# We extract just the scores
-scores = [x[1] for x in score_dict]
-scores = np.array(scores).reshape(len(C_range), len(gamma_range))
+cv_scores = grid.scores_
 
 # draw heatmap of accuracy as a function of gamma and C
 pl.figure(figsize=(8, 6))
 pl.subplots_adjust(left=0.05, right=0.95, bottom=0.15, top=0.95)
-pl.imshow(scores, interpolation='nearest', cmap=pl.cm.spectral)
+pl.imshow(cv_scores.mean(), interpolation='nearest', cmap=pl.cm.spectral)
 pl.xlabel('gamma')
 pl.ylabel('C')
-pl.colorbar()
+cb = pl.colorbar()
+cb.set_label("Accuracy")
 pl.xticks(np.arange(len(gamma_range)), gamma_range, rotation=45)
 pl.yticks(np.arange(len(C_range)), C_range)
 
+fig, axes = pl.subplots(2, 1)
+for ax, param in zip(axes, cv_scores.params):
+    maxs, errors = cv_scores.accumulate(param, 'max')
+    ax.errorbar(np.arange(len(cv_scores.values[param])), maxs,
+            yerr=errors)
+    ax.set_title(param)
+
 pl.show()
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index eccf0ac471e2e..48e5c10c0f2af 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -19,6 +19,86 @@
 from .utils import check_arrays, safe_mask
 
 
+class ResultGrid(object):
+    """Provides easy access to grid search results.
+
+    This object is constructed by GridSearchCV and
+    provides an easy interface to evaluate the grid search
+    results.
+
+    Attributes
+    ----------
+    params: list of string
+        Lists parameters adjusted during grid-search
+        This is an alphabetical sorting of the keys
+        of the ``param_grid`` used in the GridSearchCV.
+    values: dict
+        This contains the values of the parameters
+        that were used during grid search.
+    scores: ndarray
+        Contains all the scores of all runs.
+        Each axis corresponds to the setting of one
+        parameter, in the order given in params.
+        The last axis corresponds to the folds.
+    """
+
+    def __init__(self, params, values, scores):
+        self.scores = scores
+        self.params = params
+        self.values = values
+
+    def mean(self):
+        """Returns mean scores over folds for the whole parameter grid."""
+        return np.mean(self.scores, axis=-1)
+
+    def std(self):
+        """Returns standard deviation of scores over folds for the whole
+        parameter grid."""
+        return np.std(self.scores, axis=-1)
+
+    def accumulate(self, param, kind="max"):
+        """Accumulates scores over all but one parameter.
+
+        Useful for grid searches in many parameters, where
+        the whole grid can not easily be visualized.
+
+        Parameters
+        ----------
+        param: string
+            Name of the parameter not to accumulate over.
+        kind: string, 'mean' or 'max'
+            Operation that is used to accumulate over all parameters
+            except ``param``.
+
+        Returns
+        -------
+        scores: ndarray
+            1d array of scores corresponding to the different settings
+            of ``param``.
+        errors: ndarray
+            1d array of standard deviations of scores.
+        """
+        index = self.params.index(param)
+        # make interesting axis the first
+        n_values = len(self.values[param])
+        accumulated_mean = np.rollaxis(self.mean(), index, 0)
+        accumulated_mean = accumulated_mean.reshape(n_values, -1)
+        accumulated_std = np.rollaxis(self.std(), index, 0)
+        accumulated_std = accumulated_std.reshape(n_values, -1)
+        if kind == "mean":
+            accumulated_mean = np.mean(accumulated_mean, axis=-1)
+            accumulated_std = np.mean(accumulated_std, axis=-1)
+        elif kind == "max":
+            max_inds = np.argmax(accumulated_mean, axis=-1)
+            inds = np.indices(max_inds.shape)
+            accumulated_mean = accumulated_mean[inds, max_inds].ravel()
+            accumulated_std = accumulated_std[inds, max_inds].ravel()
+        else:
+            raise ValueError("kind must be 'mean' or 'all', got %s." %
+                    str(kind))
+        return accumulated_mean, accumulated_std
+
+
 class IterGrid(object):
     """Generators on the combination of the various parameter lists given
 
@@ -97,7 +177,6 @@ def fit_grid_point(X, y, base_clf, clf_params, train, test, loss_func,
     else:
         X_train = X[safe_mask(X, train)]
         X_test = X[safe_mask(X, test)]
-
     if y is not None:
         y_test = y[safe_mask(y, test)]
         y_train = y[safe_mask(y, train)]
@@ -150,8 +229,8 @@ def _check_param_grid(param_grid):
                 raise ValueError("Parameter values should be a list.")
 
             if len(v) == 0:
-                raise ValueError("Parameter values should be a non-empty "
-                        "list.")
+                raise ValueError("Parameter values should be "
+                                 "a non-empty list.")
 
 
 def _has_one_grid_point(param_grid):
@@ -268,6 +347,10 @@ class GridSearchCV(BaseEstimator, MetaEstimatorMixin):
     `best_params_` : dict
         Parameter setting that gave the best results on the hold out data.
 
+    `scores_`: list of ResultGrid
+        For each dict in ``param_grid`` this holds a ``ResultGrid`` that
+        provides easy analysis of the grid search scores.
+
     Notes
     ------
     The parameters selected are those that maximize the score of the left out
@@ -435,9 +518,38 @@ def _fit(self, X, y):
             self._best_estimator_ = best_estimator
             self._set_methods()
 
-        # Store the computed scores
-        # XXX: the name is too specific, it shouldn't have
-        # 'grid' in it. Also, we should be retrieving/storing variance
+        # param grid can be a list
+        # make singleton to list for unified treatment
+        if hasattr(self.param_grid, 'items'):
+            # wrap dictionary in a singleton list
+            param_grid = [self.param_grid]
+        else:
+            param_grid = self.param_grid
+        # for each entry in the param_grid list, we build
+        # an array of scores.
+        # we don't know how long the parts are so we have
+        # to keep track of everything :-/
+        start = 0
+        self.scores_ = []
+        for one_grid in param_grid:
+            sorted_params = sorted(one_grid.keys())
+            # get the number of values for each parameter
+            grid_shape = [len(one_grid[k]) for k in sorted_params]
+            n_entries = np.prod(grid_shape)
+            grid_shape.append(n_folds)
+            # get scores
+            score_array = np.array(cv_scores[start:start + n_entries])
+            # reshape to fit the sequence of values
+            score_array = score_array.reshape(grid_shape)
+            self.scores_.append(ResultGrid(sorted_params, one_grid,
+                                           score_array))
+            start += n_entries
+
+        # often the list is just one grid. Make access easier
+        if len(self.scores_) == 1:
+            self.scores_ = self.scores_[0]
+
+        # old interface
         self.grid_scores_ = [
             (clf_params, score, all_scores)
                     for clf_params, (score, _), all_scores
diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py
index 37a45142bc54d..d6f8812b3bc61 100644
--- a/sklearn/tests/test_grid_search.py
+++ b/sklearn/tests/test_grid_search.py
@@ -12,6 +12,7 @@
 from sklearn.grid_search import GridSearchCV
 from sklearn.datasets.samples_generator import make_classification
 from sklearn.svm import LinearSVC, SVC
+from sklearn.tree import DecisionTreeClassifier
 from sklearn.metrics import f1_score, precision_score
 from sklearn.cross_validation import KFold
 
@@ -49,7 +50,7 @@ def test_grid_search():
     assert_equal(grid_search.best_estimator_.foo_param, 2)
 
     for i, foo_i in enumerate([1, 2, 3]):
-        assert_true(grid_search.grid_scores_[i][0] == {'foo_param': foo_i})
+        assert_equal(grid_search.grid_scores_[i][0], {'foo_param': foo_i})
     # Smoke test the score:
     grid_search.score(X, y)
 
@@ -225,3 +226,33 @@ def test_X_as_list():
     cv = KFold(n=len(X), k=3)
     grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv)
     grid_search.fit(X.tolist(), y).score(X, y)
+
+
+def test_result_grid():
+    # make small grid search and test ResultGrid on it
+    clf = DecisionTreeClassifier()
+    X, y = make_classification()
+    param_grid = {'max_depth': np.arange(1, 5),
+                  'max_features': np.arange(1, 3)}
+    grid_search = GridSearchCV(clf, param_grid=param_grid)
+    grid_search.fit(X, y)
+    result = grid_search.scores_
+    assert_equal(result.mean().shape, (4, 2))
+    assert_equal(result.std().shape, (4, 2))
+    assert_equal(result.scores.shape, (4, 2, 3))
+    means, errs = result.accumulated('max_depth')
+    assert_equal(len(means), 4)
+    assert_equal(len(errs), 4)
+    assert_equal(len(result.values['max_depth']), 4)
+
+
+def test_list():
+    # test that grid search can handle list of dics as param_grid
+    # smoke test!
+    clf = DecisionTreeClassifier()
+    X, y = make_classification()
+    param_grid = [{'max_depth': np.arange(1, 5)},
+                  {'max_features': np.arange(1, 3)}]
+    grid_search = GridSearchCV(clf, param_grid=param_grid)
+    grid_search.fit(X, y)
+    assert_equal(len(grid_search.scores_), 2)