diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
index 22e4a930bb25b..18e0d8a58a281 100644
--- a/doc/modules/preprocessing.rst
+++ b/doc/modules/preprocessing.rst
@@ -397,31 +397,37 @@ only one active.
 Continuing the example above::
 
   >>> enc = preprocessing.OneHotEncoder()
-  >>> enc.fit([[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]])  # doctest: +ELLIPSIS
+  >>> enc.fit([['female', 'from US', 'uses Chrome'],
+  ... ['male', 'from Asia', 'uses Firefox']])  # doctest: +ELLIPSIS
   OneHotEncoder(categorical_features='all', dtype=<... 'numpy.float64'>,
-         handle_unknown='error', n_values='auto', sparse=True)
-  >>> enc.transform([[0, 1, 3]]).toarray()
-  array([[ 1.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.]])
+         handle_unknown='error', n_values=None, sparse=True, values='auto')
+  >>> enc.transform([['female', 'from Asia', 'uses Firefox']]).toarray()
+  array([[ 1.,  0.,  1.,  0.,  0.,  1.]])
 
 By default, how many values each feature can take is inferred automatically from the dataset.
-It is possible to specify this explicitly using the parameter ``n_values``.
+It is possible to specify this explicitly using the parameter ``xvalues``.
 There are two genders, three possible continents and four web browsers in our
 dataset.
 Then we fit the estimator, and transform a data point.
-In the result, the first two numbers encode the gender, the next set of three
-numbers the continent and the last four the web browser.
+In the result, the first two values are genders, the next set of three
+values are the continents and the last values are web browsers.
 
 Note that, if there is a possibilty that the training data might have missing categorical
 features, one has to explicitly set ``n_values``. For example,
 
-    >>> enc = preprocessing.OneHotEncoder(n_values=[2, 3, 4])
+    >>> browsers = ['uses Internet Explorer', 'uses Chrome' , 'uses Safari', 'uses Firefox']
+    >>> genders = ['male', 'female']
+    >>> locations = ['from Europe', 'from Asia', 'from US']
+    >>> enc = preprocessing.OneHotEncoder(values=[genders, locations, browsers])
     >>> # Note that for there are missing categorical values for the 2nd and 3rd
     >>> # feature
-    >>> enc.fit([[1, 2, 3], [0, 2, 0]])  # doctest: +ELLIPSIS
+    >>> enc.fit([['female', 'from US', 'uses Chrome'],
+    ... ['male', 'from Asia', 'uses Internet Explorer']])  # doctest: +ELLIPSIS
     OneHotEncoder(categorical_features='all', dtype=<... 'numpy.float64'>,
-           handle_unknown='error', n_values=[2, 3, 4], sparse=True)
-    >>> enc.transform([[1, 0, 0]]).toarray()
-    array([[ 0.,  1.,  1.,  0.,  0.,  1.,  0.,  0.,  0.]])
+           handle_unknown='error', n_values=None, sparse=True,
+	   values=[...])
+    >>> enc.transform([['male', 'from Europe', 'uses Safari']]).toarray()
+    array([[ 0.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  1.]])
 
 See :ref:`dict_feature_extraction` for categorical features that are represented
 as a dict, not as integers.
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 1c3d8db580272..208419f5d354c 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -27,6 +27,8 @@
                                  mean_variance_axis, incr_mean_variance_axis,
                                  min_max_axis)
 from ..utils.validation import check_is_fitted, FLOAT_DTYPES
+from .label import LabelEncoder
+from ..utils.fixes import in1d, setdiff1d
 
 
 zip = six.moves.zip
@@ -1673,28 +1675,29 @@ def add_dummy_feature(X, value=1.0):
         return np.hstack((np.ones((n_samples, 1)) * value, X))
 
 
-def _transform_selected(X, transform, selected="all", copy=True):
-    """Apply a transform function to portion of selected features
-
+def _apply_selected(X, transform, selected="all", dtype=np.float, copy=True,
+                    return_val=True):
+    """Apply a function to portion of selected features
     Parameters
     ----------
-    X : {array-like, sparse matrix}, shape [n_samples, n_features]
+    X : {array, sparse matrix}, shape [n_samples, n_features]
         Dense array or sparse matrix.
-
     transform : callable
         A callable transform(X) -> X_transformed
-
     copy : boolean, optional
         Copy X even if it could be avoided.
-
     selected: "all" or array of indices or mask
         Specify which features to apply the transform to.
-
+    return_val : boolean, optional
+        Whether to return the transformed matrix. If not set `None` is
+        returned.
     Returns
     -------
-    X : array or sparse matrix, shape=(n_samples, n_features_new)
+        X : array or sparse matrix, shape=(n_samples, n_features_new)
     """
-    X = check_array(X, accept_sparse='csc', copy=copy, dtype=FLOAT_DTYPES)
+
+    if copy:
+        X = X.copy()
 
     if isinstance(selected, six.string_types) and selected == "all":
         return transform(X)
@@ -1717,22 +1720,22 @@ def _transform_selected(X, transform, selected="all", copy=True):
         return transform(X)
     else:
         X_sel = transform(X[:, ind[sel]])
-        X_not_sel = X[:, ind[not_sel]]
+        X_not_sel = X[:, ind[not_sel]].astype(dtype)
 
-        if sparse.issparse(X_sel) or sparse.issparse(X_not_sel):
-            return sparse.hstack((X_sel, X_not_sel))
-        else:
-            return np.hstack((X_sel, X_not_sel))
+        if return_val:
+            if sparse.issparse(X_sel) or sparse.issparse(X_not_sel):
+                return sparse.hstack((X_sel, X_not_sel))
+            else:
+                return np.hstack((X_sel, X_not_sel))
 
 
 class OneHotEncoder(BaseEstimator, TransformerMixin):
     """Encode categorical integer features using a one-hot aka one-of-K scheme.
 
-    The input to this transformer should be a matrix of integers, denoting
-    the values taken on by categorical (discrete) features. The output will be
-    a sparse matrix where each column corresponds to one possible value of one
-    feature. It is assumed that input features take on values in the range
-    [0, n_values).
+    The input to this transformer should be a matrix of integers or strings,
+    denoting the values taken on by categorical (discrete) features. The
+    output will be a sparse matrix where each column corresponds to one
+    possible value of one feature.
 
     This encoding is needed for feeding categorical data to many scikit-learn
     estimators, notably linear models and SVMs with the standard kernels.
@@ -1741,15 +1744,13 @@ class OneHotEncoder(BaseEstimator, TransformerMixin):
 
     Parameters
     ----------
-    n_values : 'auto', int or array of ints
-        Number of values per feature.
-
-        - 'auto' : determine value range from training data.
-        - int : number of categorical values per feature.
-                Each feature value should be in ``range(n_values)``
-        - array : ``n_values[i]`` is the number of categorical values in
-                  ``X[:, i]``. Each feature value should be
-                  in ``range(n_values[i])``
+    values : 'auto', 'seen', int, list of ints, or list of lists of objects
+        - 'auto' : determine set of values from training data. See the
+          documentation of `handle_unknown` for which values are considered
+          acceptable.
+        - int : values are in ``range(values)`` for all features
+        - list of ints : values for feature ``i`` are in ``range(values[i])``
+        - list of lists : values for feature ``i`` are in ``values[i]``
 
     categorical_features: "all" or array of indices or mask
         Specify what features are treated as categorical.
@@ -1767,23 +1768,23 @@ class OneHotEncoder(BaseEstimator, TransformerMixin):
         Will return sparse matrix if set True else will return an array.
 
     handle_unknown : str, 'error' or 'ignore'
-        Whether to raise an error or ignore if a unknown categorical feature is
-        present during transform.
 
-    Attributes
-    ----------
-    active_features_ : array
-        Indices for active features, meaning values that actually occur
-        in the training set. Only available when n_values is ``'auto'``.
+        - 'ignore': Ignore all unknown feature values.
+        - 'error': Raise an error when the value of a feature is unseen during
+          `fit` and out of range of values seen during `fit`.
+        - 'error-strict': Raise an error when the value of a feature is unseen
+          during`fit`.
 
-    feature_indices_ : array of shape (n_features,)
-        Indices to feature ranges.
-        Feature ``i`` in the original data is mapped to features
-        from ``feature_indices_[i]`` to ``feature_indices_[i+1]``
-        (and then potentially masked by `active_features_` afterwards)
+    copy : bool, default=True
+        If unset, `X` maybe modified in space.
 
-    n_values_ : array of shape (n_features,)
-        Maximum number of values per feature.
+    Attributes
+    ----------
+    label_encoders_ : list of size n_features.
+        The :class:`sklearn.preprocessing.LabelEncoder` objects used to encode
+        the features. ``self.label_encoders[i]_`` is the LabelEncoder object
+        used to encode the ith column. The unique features found on column
+        ``i`` can be accessed using ``self.label_encoders_[i].classes_``.
 
     Examples
     --------
@@ -1793,16 +1794,13 @@ class OneHotEncoder(BaseEstimator, TransformerMixin):
 
     >>> from sklearn.preprocessing import OneHotEncoder
     >>> enc = OneHotEncoder()
-    >>> enc.fit([[0, 0, 3], [1, 1, 0], [0, 2, 1], \
-[1, 0, 2]])  # doctest: +ELLIPSIS
+    >>> enc.fit([['cat', 4], ['mouse', 15], ['dog', 17]]) # doctest: +ELLIPSIS
     OneHotEncoder(categorical_features='all', dtype=<... 'numpy.float64'>,
-           handle_unknown='error', n_values='auto', sparse=True)
-    >>> enc.n_values_
-    array([2, 3, 4])
-    >>> enc.feature_indices_
-    array([0, 2, 5, 9])
-    >>> enc.transform([[0, 1, 1]]).toarray()
-    array([[ 1.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.]])
+           handle_unknown='error', n_values=None, sparse=True, values='auto')
+    >>> list(enc.label_encoders_[0].classes_)
+    ['cat', 'dog', 'mouse']
+    >>> enc.transform([['dog', 4]]).toarray()
+    array([[ 0.,  1.,  0.,  1.,  0.,  0.]])
 
     See also
     --------
@@ -1811,138 +1809,208 @@ class OneHotEncoder(BaseEstimator, TransformerMixin):
     sklearn.feature_extraction.FeatureHasher : performs an approximate one-hot
       encoding of dictionary items or strings.
     """
-    def __init__(self, n_values="auto", categorical_features="all",
-                 dtype=np.float64, sparse=True, handle_unknown='error'):
-        self.n_values = n_values
+
+    def __init__(self, values='auto', categorical_features="all",
+                 n_values=None, dtype=np.float64, sparse=True,
+                 handle_unknown='error', copy=True):
+        self.values = values
         self.categorical_features = categorical_features
         self.dtype = dtype
         self.sparse = sparse
         self.handle_unknown = handle_unknown
+        self.n_values = n_values
+        self.copy = copy
 
     def fit(self, X, y=None):
-        """Fit OneHotEncoder to X.
+        """Fit the CategoricalEncoder to X.
 
         Parameters
         ----------
         X : array-like, shape [n_samples, n_feature]
-            Input array of type int.
+            Array of ints or strings or both.
 
         Returns
         -------
         self
         """
-        self.fit_transform(X)
-        return self
 
-    def _fit_transform(self, X):
-        """Assumes X contains only categorical features."""
-        X = check_array(X, dtype=np.int)
-        if np.any(X < 0):
-            raise ValueError("X needs to contain only non-negative integers.")
+        X = check_array(X, dtype=np.object, accept_sparse='csc',
+                        copy=self.copy)
         n_samples, n_features = X.shape
-        if (isinstance(self.n_values, six.string_types) and
-                self.n_values == 'auto'):
-            n_values = np.max(X, axis=0) + 1
-        elif isinstance(self.n_values, numbers.Integral):
-            if (np.max(X, axis=0) >= self.n_values).any():
-                raise ValueError("Feature out of bounds for n_values=%d"
-                                 % self.n_values)
-            n_values = np.empty(n_features, dtype=np.int)
-            n_values.fill(self.n_values)
-        else:
-            try:
-                n_values = np.asarray(self.n_values, dtype=int)
-            except (ValueError, TypeError):
-                raise TypeError("Wrong type for parameter `n_values`. Expected"
-                                " 'auto', int or array of ints, got %r"
-                                % type(X))
-            if n_values.ndim < 1 or n_values.shape[0] != X.shape[1]:
-                raise ValueError("Shape mismatch: if n_values is an array,"
-                                 " it has to be of shape (n_features,).")
-
-        self.n_values_ = n_values
-        n_values = np.hstack([[0], n_values])
-        indices = np.cumsum(n_values)
-        self.feature_indices_ = indices
 
-        column_indices = (X + indices[:-1]).ravel()
-        row_indices = np.repeat(np.arange(n_samples, dtype=np.int32),
-                                n_features)
-        data = np.ones(n_samples * n_features)
-        out = sparse.coo_matrix((data, (row_indices, column_indices)),
-                                shape=(n_samples, indices[-1]),
-                                dtype=self.dtype).tocsr()
+        _apply_selected(X, self._fit, dtype=self.dtype,
+                        selected=self.categorical_features, copy=True,
+                        return_val=False)
+        return self
 
-        if (isinstance(self.n_values, six.string_types) and
-                self.n_values == 'auto'):
-            mask = np.array(out.sum(axis=0)).ravel() != 0
-            active_features = np.where(mask)[0]
-            out = out[:, active_features]
-            self.active_features_ = active_features
+    def _fit(self, X):
+        "Assumes `X` contains only catergorical features."
 
-        return out if self.sparse else out.toarray()
+        X = check_array(X, dtype=np.object)
+        n_samples, n_features = X.shape
+
+        self._n_features = n_features
+        self.label_encoders_ = [LabelEncoder() for i in range(n_features)]
+        # Maximum value for each featue
+        self._max_values = [None for i in range(n_features)]
+
+        if self.n_values is not None:
+            warnings.warn('The parameter `n_values` is deprecated, use the'
+                          'parameter `classes_` instead and specify the '
+                          'expected values for each feature')
+
+            if isinstance(self.n_values, numbers.Integral):
+                if (np.max(X, axis=0) >= self.n_values).any():
+                    raise ValueError("Feature out of bounds for n_values=%d"
+                                     % self.n_values)
+                self.values = self.n_values
+            else:
+                try:
+                    n_values = np.asarray(self.n_values, dtype=int)
+                except (ValueError, TypeError):
+                    raise TypeError("Wrong type for parameter `n_values`."
+                                    " Expected 'auto', int or array of ints,"
+                                    "got %r" % type(X))
+                if n_values.ndim < 1 or n_values.shape[0] != X.shape[1]:
+                    raise ValueError("Shape mismatch: if n_values is an array,"
+                                     " it has to be of shape (n_features,).")
+                self.values = list(self.n_values)
+
+        error_msg = ("`values` should be 'auto', an integer, a list of"
+                     " integers or a list of list")
+
+        for i in range(n_features):
+            le = self.label_encoders_[i]
+
+            self._max_values[i] = np.max(X[:, i])
+            if self.values == 'auto':
+                le.fit(X[:, i])
+            elif isinstance(self.values, numbers.Integral):
+                if (np.max(X, axis=0) >= self.values).any():
+                    raise ValueError("Feature out of bounds for n_values=%d"
+                                     % self.values)
+                le.fit(np.arange(self.values, dtype=np.int))
+            elif isinstance(self.values, list):
+                if len(self.values) != X.shape[1]:
+                    raise ValueError("Shape mismatch: if n_values is a list,"
+                                     " it has to be of length (n_features).")
+                if isinstance(self.values[i], list):
+                    le.fit(self.values[i])
+                elif isinstance(self.values[i], numbers.Integral):
+                    le.fit(np.arange(self.values[i], dtype=np.int))
+                else:
+                    raise ValueError(error_msg)
+            else:
+                raise ValueError(error_msg)
+
+    def transform(self, X, y=None):
+        """Encode the selected categorical features using the one-hot scheme.
 
-    def fit_transform(self, X, y=None):
-        """Fit OneHotEncoder to X, then transform X.
+        Parameters
+        ----------
+        X : array-like, shape [n_samples, n_feature]
+            Array of ints or strings or both.
 
-        Equivalent to self.fit(X).transform(X), but more convenient and more
-        efficient. See fit for the parameters, transform for the return value.
+        Returns
+        -------
+        out : array, shape[n_samples, n_features_new]
+            `X` encoded using the one-hot scheme.
         """
-        return _transform_selected(X, self._fit_transform,
-                                   self.categorical_features, copy=True)
+        X = check_array(X, dtype=np.object)
+
+        return _apply_selected(X, self._transform, copy=True,
+                               selected=self.categorical_features)
 
     def _transform(self, X):
-        """Assumes X contains only categorical features."""
-        X = check_array(X, dtype=np.int)
-        if np.any(X < 0):
-            raise ValueError("X needs to contain only non-negative integers.")
+        "Assumes `X` contains only categorical features."
+
+        X = check_array(X, accept_sparse='csc', dtype=np.object)
         n_samples, n_features = X.shape
+        X_int = np.zeros_like(X, dtype=np.int)
+        X_mask = np.ones_like(X, dtype=np.bool)
+
+        for i in range(n_features):
+
+            valid_mask = in1d(X[:, i], self.label_encoders_[i].classes_)
+
+            if not np.all(valid_mask):
+                if self.handle_unknown in ['error', 'error-strict']:
+                    diff = setdiff1d(X[:, i], self.label_encoders_[i].classes_)
+                    if self.handle_unknown == 'error-strict':
+                        msg = 'Unknown feature(s) %s in column %d' % (diff, i)
+                        raise ValueError(msg)
+                    else:
+                        if np.all(diff <= self._max_values[i]):
+                            msg = ('Values %s for feature %d are unknown but '
+                                   'in range. This will raise an error in '
+                                   'future versions.' % (str(diff), i))
+                            warnings.warn(FutureWarning(msg))
+                            X_mask[:, i] = valid_mask
+                            le = self.label_encoders_[i]
+                            X[:, i][~valid_mask] = le.classes_[0]
+                        else:
+                            msg = ('Unknown feature(s) %s in column %d' %
+                                   (diff, i))
+                            raise ValueError(msg)
+                elif self.handle_unknown == 'ignore':
+                    # Set the problematic rows to an acceptable value and
+                    # continue. The rows are marked in `X_mask` and will be
+                    # removed later.
+                    X_mask[:, i] = valid_mask
+                    X[:, i][~valid_mask] = self.label_encoders_[i].classes_[0]
+                else:
+                    template = ("handle_unknown should be either 'error' or "
+                                "'ignore', got %s")
+                    raise ValueError(template % self.handle_unknown)
+
+            X_int[:, i] = self.label_encoders_[i].transform(X[:, i])
 
-        indices = self.feature_indices_
-        if n_features != indices.shape[0] - 1:
-            raise ValueError("X has different shape than during fitting."
-                             " Expected %d, got %d."
-                             % (indices.shape[0] - 1, n_features))
-
-        # We use only those categorical features of X that are known using fit.
-        # i.e lesser than n_values_ using mask.
-        # This means, if self.handle_unknown is "ignore", the row_indices and
-        # col_indices corresponding to the unknown categorical feature are
-        # ignored.
-        mask = (X < self.n_values_).ravel()
-        if np.any(~mask):
-            if self.handle_unknown not in ['error', 'ignore']:
-                raise ValueError("handle_unknown should be either error or "
-                                 "unknown got %s" % self.handle_unknown)
-            if self.handle_unknown == 'error':
-                raise ValueError("unknown categorical feature present %s "
-                                 "during transform." % X.ravel()[~mask])
-
-        column_indices = (X + indices[:-1]).ravel()[mask]
+        mask = X_mask.ravel()
+        n_values = [le.classes_.shape[0] for le in self.label_encoders_]
+        n_values = np.hstack([[0], n_values])
+        indices = np.cumsum(n_values)
+
+        column_indices = (X_int + indices[:-1]).ravel()[mask]
         row_indices = np.repeat(np.arange(n_samples, dtype=np.int32),
                                 n_features)[mask]
-        data = np.ones(np.sum(mask))
+        data = np.ones(n_samples * n_features)[mask]
+
         out = sparse.coo_matrix((data, (row_indices, column_indices)),
                                 shape=(n_samples, indices[-1]),
                                 dtype=self.dtype).tocsr()
+
         if (isinstance(self.n_values, six.string_types) and
                 self.n_values == 'auto'):
             out = out[:, self.active_features_]
 
         return out if self.sparse else out.toarray()
 
-    def transform(self, X):
-        """Transform X using one-hot encoding.
+    @property
+    def active_features_(self):
+        warnings.warn('The property `active_features_` is deprecated and'
+                      ' will be removed in version 0.20')
+        if self.n_values is None:
+            #TODO: What to do when classes are strings ?
+            classes = [le.classes_ for le in self.label_encoders_]
+            classes_max = [np.max(cls) + 1 for cls in classes]
+            cum_idx = np.cumsum([0] + classes_max)
+            active_idx = [self.label_encoders_[i].classes_.astype(np.int)
+                          + cum_idx[i]
+                          for i in range(self._n_features)]
+
+            return np.concatenate(active_idx, axis=0).astype(np.int)
+        else:
+            raise AttributeError()
 
-        Parameters
-        ----------
-        X : array-like, shape [n_samples, n_features]
-            Input array of type int.
+    @property
+    def feature_indices_(self):
+        warnings.warn('The property `feature_indices_` is deprecated and'
+                      ' will be removed in version 0.20')
+        classes_max = [np.max(le.classes_) + 1 for le in self.label_encoders_]
+        return np.cumsum([0] + classes_max)
 
-        Returns
-        -------
-        X_out : sparse matrix if sparse=True else a 2-d array, dtype=int
-            Transformed input.
-        """
-        return _transform_selected(X, self._transform,
-                                   self.categorical_features, copy=True)
+    @property
+    def n_values_(self):
+        warnings.warn('The property `n_values_` is deprecated and'
+                      ' will be removed in version 0.20')
+        return np.array([le.classes_.shape[0] for le in self.label_encoders_])
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index 39360ebdd779f..5944e4ab1f748 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -6,6 +6,7 @@
 # License: BSD 3 clause
 
 import warnings
+import re
 import numpy as np
 import numpy.linalg as la
 from scipy import sparse
@@ -32,7 +33,7 @@
 from sklearn.utils.testing import skip_if_32bit
 
 from sklearn.utils.sparsefuncs import mean_variance_axis
-from sklearn.preprocessing.data import _transform_selected
+from sklearn.preprocessing.data import _apply_selected
 from sklearn.preprocessing.data import _handle_zeros_in_scale
 from sklearn.preprocessing.data import Binarizer
 from sklearn.preprocessing.data import KernelCenterer
@@ -1501,9 +1502,10 @@ def test_one_hot_encoder_sparse():
     # test that an error is raised when out of bounds:
     X_too_large = [[0, 2, 1], [0, 1, 1]]
     assert_raises(ValueError, enc.transform, X_too_large)
-    error_msg = "unknown categorical feature present \[2\] during transform."
+    error_msg = re.escape("Unknown feature(s) [2] in column 1")
     assert_raises_regex(ValueError, error_msg, enc.transform, X_too_large)
     assert_raises(ValueError, OneHotEncoder(n_values=2).fit_transform, X)
+    assert_raises(ValueError, OneHotEncoder(values=2).fit_transform, X)
 
     # test that error is raised when wrong number of features
     assert_raises(ValueError, enc.transform, X[:, :-1])
@@ -1513,14 +1515,6 @@ def test_one_hot_encoder_sparse():
     # test exception on wrong init param
     assert_raises(TypeError, OneHotEncoder(n_values=np.int).fit, X)
 
-    enc = OneHotEncoder()
-    # test negative input to fit
-    assert_raises(ValueError, enc.fit, [[0], [-1]])
-
-    # test negative input to transform
-    enc.fit([[0], [1]])
-    assert_raises(ValueError, enc.transform, [[0], [-1]])
-
 
 def test_one_hot_encoder_dense():
     # check for sparse=False
@@ -1539,26 +1533,26 @@ def test_one_hot_encoder_dense():
                                  [1., 0., 1., 0., 1.]]))
 
 
-def _check_transform_selected(X, X_expected, sel):
+def _check_apply_selected(X, X_expected, sel):
     for M in (X, sparse.csr_matrix(X)):
-        Xtr = _transform_selected(M, Binarizer().transform, sel)
+        Xtr = _apply_selected(M, Binarizer().transform, sel)
         assert_array_equal(toarray(Xtr), X_expected)
 
 
 def test_transform_selected():
-    X = [[3, 2, 1], [0, 1, 1]]
+    X = np.array([[3, 2, 1], [0, 1, 1]])
 
     X_expected = [[1, 2, 1], [0, 1, 1]]
-    _check_transform_selected(X, X_expected, [0])
-    _check_transform_selected(X, X_expected, [True, False, False])
+    _check_apply_selected(X, X_expected, [0])
+    _check_apply_selected(X, X_expected, [True, False, False])
 
     X_expected = [[1, 1, 1], [0, 1, 1]]
-    _check_transform_selected(X, X_expected, [0, 1, 2])
-    _check_transform_selected(X, X_expected, [True, True, True])
-    _check_transform_selected(X, X_expected, "all")
+    _check_apply_selected(X, X_expected, [0, 1, 2])
+    _check_apply_selected(X, X_expected, [True, True, True])
+    _check_apply_selected(X, X_expected, "all")
 
-    _check_transform_selected(X, X, [])
-    _check_transform_selected(X, X, [False, False, False])
+    _check_apply_selected(X, X, [])
+    _check_apply_selected(X, X, [False, False, False])
 
 
 def test_transform_selected_copy_arg():
@@ -1571,8 +1565,8 @@ def _mutating_transformer(X):
     expected_Xtr = [[2, 2], [3, 4]]
 
     X = original_X.copy()
-    Xtr = _transform_selected(X, _mutating_transformer, copy=True,
-                              selected='all')
+    Xtr = _apply_selected(X, _mutating_transformer, copy=True,
+                          selected='all')
 
     assert_array_equal(toarray(X), toarray(original_X))
     assert_array_equal(toarray(Xtr), expected_Xtr)
@@ -1601,6 +1595,14 @@ def _check_one_hot(X, X2, cat, n_features):
     assert_array_equal(toarray(B), toarray(D))
 
 
+def test_one_hot_encoder_string():
+    X = [['cat', 'domestic'], ['wolf', 'wild']]
+    enc = OneHotEncoder()
+    enc.fit(X)
+    Xtr = enc.transform([['cat', 'wild']])
+    assert_array_equal(toarray(Xtr), [[1, 0, 0, 1]])
+
+
 def test_one_hot_encoder_categorical_features():
     X = np.array([[3, 2, 1], [0, 1, 1]])
     X2 = np.array([[1, 1, 1]])
@@ -1623,10 +1625,19 @@ def test_one_hot_encoder_unknown_transform():
 
     # Test that one hot encoder raises error for unknown features
     # present during transform.
-    oh = OneHotEncoder(handle_unknown='error')
+    oh = OneHotEncoder(handle_unknown='error-strict')
     oh.fit(X)
     assert_raises(ValueError, oh.transform, y)
 
+    # Test that one hot encoder raises warning for unknown but in range
+    # features
+    oh = OneHotEncoder(handle_unknown='error')
+    oh.fit(X)
+    msg = ('Values [0] for feature 2 are unknown but in range. '
+           'This will raise an error in future versions.')
+    assert_warns_message(FutureWarning, msg, oh.transform,
+                         np.array([[0, 0, 0]]))
+
     # Test the ignore option, ignores unknown features.
     oh = OneHotEncoder(handle_unknown='ignore')
     oh.fit(X)
@@ -1634,7 +1645,23 @@ def test_one_hot_encoder_unknown_transform():
         oh.transform(y).toarray(),
         np.array([[0.,  0.,  0.,  0.,  1.,  0.,  0.]]))
 
-    # Raise error if handle_unknown is neither ignore or error.
+    X = np.array([['cat', 2, 1], ['dog', 0, 3], ['mouse', 0, 2]])
+    y = np.array([['ET', 1, 1]])
+
+    # Test that one hot encoder raises error for unknown features
+    # present during transform.
+    oh = OneHotEncoder(handle_unknown='error-strict')
+    oh.fit(X)
+    assert_raises(ValueError, oh.transform, y)
+
+    # Test the ignore option, ignores unknown features.
+    oh = OneHotEncoder(handle_unknown='ignore')
+    oh.fit(X)
+    assert_array_equal(
+        oh.transform(y).toarray(),
+        np.array([[0.,  0.,  0., 0.,  0.,  1.,  0.,  0.]]))
+
+    # Raise error if handle_unknown is neither ignore nor error.
     oh = OneHotEncoder(handle_unknown='42')
     oh.fit(X)
     assert_raises(ValueError, oh.transform, y)
diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
index a10afa6d4d1e3..b97140c10cf86 100644
--- a/sklearn/utils/fixes.py
+++ b/sklearn/utils/fixes.py
@@ -281,13 +281,32 @@ def frombuffer_empty(buf, dtype):
     frombuffer_empty = np.frombuffer
 
 
+def _in1d_object(ar1, ar2, invert=False):
+    # np.argsort(kind='mergesort') is only supported for object types after
+    # version 1.8. Hence in1d for object arrays needs to be handled differently
+    values1 = set(ar1)
+    values2 = set(ar2)
+    absent_values = values1 - values2
+
+    present = np.ones_like(ar1, dtype=np.bool)
+
+    for value in absent_values:
+        present[ar1 == value] = False
+
+    return ~present if invert else present
+
+
 if np_version < (1, 8):
     def in1d(ar1, ar2, assume_unique=False, invert=False):
         # Backport of numpy function in1d 1.8.1 to support numpy 1.6.2
         # Ravel both arrays, behavior for the first array could be different
+
         ar1 = np.asarray(ar1).ravel()
         ar2 = np.asarray(ar2).ravel()
 
+        if ar1.dtype == object or ar2.dtype == object:
+            return _in1d_object(ar1, ar2, invert)
+
         # This code is significantly faster when the condition is satisfied.
         if len(ar2) < 10 * len(ar1) ** 0.145:
             if invert:
@@ -443,3 +462,28 @@ def rankdata(a, method='average'):
         return .5 * (count[dense] + count[dense - 1] + 1)
 else:
     from scipy.stats import rankdata
+
+
+if np_version < (1, 8):
+    # Backport of setdiff1d function as it relies on in1d
+    def setdiff1d(ar1, ar2, assume_unique=False):
+        # copy-paste from numpy except for the object type if clause
+        if assume_unique:
+            ar1 = np.asarray(ar1).ravel()
+        else:
+            # Unique is not supported for object arrays till np version 1.8
+            # due to mergesort
+            if ar1.dtype == object:
+                ar1 = np.array(sorted(set(ar1)))
+            else:
+                ar1 = np.unique(ar1)
+
+            if ar2.dtype == object:
+                ar2 = np.array(sorted(set(ar2)))
+            else:
+                ar2 = np.unique(ar2)
+
+        return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)]
+
+else:
+    from numpy import setdiff1d