diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index e09ca0422d8a7..19d70b7e66646 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -1254,6 +1254,7 @@ Model validation
    preprocessing.Normalizer
    preprocessing.OneHotEncoder
    preprocessing.OrdinalEncoder
+   preprocessing.UnaryEncoder
    preprocessing.PolynomialFeatures
    preprocessing.PowerTransformer
    preprocessing.QuantileTransformer
diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
index 54210f2453cb0..d228ddb1cf58d 100644
--- a/doc/modules/preprocessing.rst
+++ b/doc/modules/preprocessing.rst
@@ -451,6 +451,10 @@ The normalizer instance can then be used on sample vectors as any transformer::
 
 Encoding categorical features
 =============================
+
+Ordinal encoding
+----------------
+
 Often features are not given as continuous values but categorical.
 For example a person could have features ``["male", "female"]``,
 ``["from Europe", "from US", "from Asia"]``,
@@ -471,11 +475,22 @@ new feature of integers (0 to n_categories - 1)::
     >>> enc.transform([['female', 'from US', 'uses Safari']])
     array([[0., 1., 1.]])
 
+You can specify the order of the categories by passing the ``categories``
+attribute::
+    >>> enc = preprocessing.OrdinalEncoder(categories=[['big', 'small'],
+    ...                                                ['short', 'tall']])
+    >>> X = [['big', 'tall']]
+    >>> enc.fit_transform(X)  # doctest: +ELLIPSIS
+    array([[0., 1.]])
+
 Such integer representation can, however, not be used directly with all
 scikit-learn estimators, as these expect continuous input, and would interpret
 the categories as being ordered, which is often not desired (i.e. the set of
 browsers was ordered arbitrarily).
 
+One-hot encoding
+----------------
+
 Another possibility to convert categorical features to features that can be used
 with scikit-learn estimators is to use a one-of-K, also known as one-hot or
 dummy encoding.
@@ -539,9 +554,73 @@ columns for this feature will be all zeros
     >>> enc.transform([['female', 'from Asia', 'uses Chrome']]).toarray()
     array([[1., 0., 0., 0., 0., 0.]])
 
+See :ref:`dict_feature_extraction` for categorical features that are
+represented as a dict, not as scalars.
+
+.. _unary_encoding:
+
+Unary encoding
+--------------
+
+For some ordinal features, it does not necessarily make sense to use
+:class:`OrdinalEncoder` if the difference between the ordered categories is
+uneven, for example with a feature that takes values in "very short",
+"short", "big".
+
+For such features, it is possible to use a unary encoding, which is
+implemented in :class:`UnaryEncoder`. This encoder transforms each ordinal
+feature with ``m`` possible values into ``m - 1`` binary features, where the
+ith feature is active if x > i. For example::
+
+  >>> enc = preprocessing.UnaryEncoder()
+  >>> enc.fit([[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]]) # doctest: +ELLIPSIS
+  UnaryEncoder(dtype=<... 'numpy.float64'>, handle_greater='warn',
+               max_value='auto', sparse=False)
+  >>> enc.transform([[0, 1, 3]])
+  array([[0., 1., 0., 1., 1., 1.]])
+
+Here the first feature with 2 categories is transformed into 1 column, the
+second feature with 3 values is transformed into 2 columns, and the third
+feature is transformed into 3 columns.
+
+By default, the number of categories in a feature is inferred automatically
+from the dataset by looking for the maximum value. It is possible to specify
+this explicitly using the parameter ``max_value``. In particular if the
+training data might have missing categorical features, one has to explicitly
+set ``max_value``. For example,::
+
+  >>> enc = preprocessing.UnaryEncoder(max_value=[2, 3, 4])
+  >>> # Note that there are missing categorical values for the 2nd and 3rd
+  >>> # features
+  >>> enc.fit([[1, 2, 3], [0, 2, 0]])  # doctest: +ELLIPSIS
+  UnaryEncoder(dtype=<... 'numpy.float64'>, handle_greater='warn',
+               max_value=[2, 3, 4], sparse=False)
+  >>> enc.transform([[1, 1, 2]])
+  array([[1., 1., 0., 1., 1., 0.]])
+
+.. note::
+
+  This encoding is likely to help when used with linear models and
+  kernel-based models like SVMs with the standard kernels. On the other
+  hand, this transformation is unlikely to help when using tree-based
+  models, since those already work on the basis of a particular feature
+  value being less or bigger than a threshold.
+
+In case the input variable is not represented as a number from 0 to
+``max_value``, it is possible to combine :class:`UnaryEncoder` and
+:class:`OrdinalEncoder` into a :class:`Pipeline <sklearn.pipeline.Pipeline>`
+like so::
+
+  >>> from sklearn.pipeline import make_pipeline
+  >>> from sklearn.preprocessing import OrdinalEncoder, UnaryEncoder
+  >>> categories = [['small', 'medium', 'huge']]
+  >>> pipeline = make_pipeline(OrdinalEncoder(categories), UnaryEncoder())
+  >>> X = [['small'], ['medium'], ['huge']]
+  >>> pipeline.fit_transform(X)
+  array([[0., 0.],
+         [1., 0.],
+         [1., 1.]])
 
-See :ref:`dict_feature_extraction` for categorical features that are represented
-as a dict, not as scalars.
 
 .. _preprocessing_discretization:
 
diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 3b4e6e439c934..95cb39dc01b1e 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -183,6 +183,11 @@ Support for Python 3.4 and below has been officially dropped.
   in the dense case. Also added a new parameter ``order`` which controls output
   order for further speed performances. :issue:`12251` by `Tom Dupre la Tour`_.
 
+- |Feature| Added a new encoder :class:`preprocessing.UnaryEncoding`, useful
+  for ordinal features with uneven differences between categories.
+  :issue:`12893` by :user:`Ruxandra Burtica <ruxandraburtica>`, :user:`Arjun
+  Jauhari <arjunjauhari>` and :user:`Nicolas Hug <NicolasHug>`.
+
 :mod:`sklearn.tree`
 ...................
 - |Feature| Decision Trees can now be plotted with matplotlib using
diff --git a/sklearn/preprocessing/__init__.py b/sklearn/preprocessing/__init__.py
index d1d69bde6f4a8..44df9d4a970dc 100644
--- a/sklearn/preprocessing/__init__.py
+++ b/sklearn/preprocessing/__init__.py
@@ -27,6 +27,7 @@
 
 from ._encoders import OneHotEncoder
 from ._encoders import OrdinalEncoder
+from ._encoders import UnaryEncoder
 
 from .label import label_binarize
 from .label import LabelBinarizer
@@ -53,6 +54,7 @@
     'Normalizer',
     'OneHotEncoder',
     'OrdinalEncoder',
+    'UnaryEncoder',
     'PowerTransformer',
     'RobustScaler',
     'StandardScaler',
diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
index 3dabd0b3e0cda..694fa7b0d6e8b 100644
--- a/sklearn/preprocessing/_encoders.py
+++ b/sklearn/preprocessing/_encoders.py
@@ -263,6 +263,8 @@ class OneHotEncoder(_BaseEncoder):
     --------
     sklearn.preprocessing.OrdinalEncoder : performs an ordinal (integer)
       encoding of the categorical features.
+    sklearn.preprocessing.UnaryEncoder : performs a unary encoding of ordinal
+      data.
     sklearn.feature_extraction.DictVectorizer : performs a one-hot encoding of
       dictionary items (also handles string-valued features).
     sklearn.feature_extraction.FeatureHasher : performs an approximate one-hot
@@ -766,6 +768,8 @@ class OrdinalEncoder(_BaseEncoder):
     --------
     sklearn.preprocessing.OneHotEncoder : performs a one-hot encoding of
       categorical features.
+    sklearn.preprocessing.UnaryEncoder : performs a unary encoding of ordinal
+      data.
     sklearn.preprocessing.LabelEncoder : encodes target labels with values
       between 0 and n_classes-1.
     """
@@ -846,3 +850,245 @@ def inverse_transform(self, X):
             X_tr[:, i] = self.categories_[i][labels]
 
         return X_tr
+
+
+class UnaryEncoder(BaseEstimator, TransformerMixin):
+    """Encode ordinal integer features using a unary scheme.
+
+    This encoder transforms each ordinal feature with ``m`` possible values
+    into ``m - 1`` binary features, where the ith feature is active if ``x >
+    i``. The input to this transformer should be a matrix of non-negative
+    integers, denoting the values taken on by the ordinal features.
+
+    This encoding may be needed for feeding ordinal features to many
+    scikit-learn estimators, notably linear models and kernel-based models
+    like SVMs with the standard kernels.
+    This transformation is unlikely to help when using with tree-based models,
+    since those already work on the basis of a particular feature value being
+    less or greater than a threshold, unlike linear and kernel-based models.
+
+    This encoder encodes all of the features. To only encode a subset of the
+    features, use :class:`ColumnTransformer
+    <sklearn.compose.ColumnTransformer>`.
+
+    Read more in the :ref:`User Guide <unary_encoding>`.
+
+    Parameters
+    ----------
+    max_value : 'auto', int or array of ints, optional (default='auto')
+        Number of categories per feature.
+
+        - 'auto' : determine value range from training data by looking for
+          the maximum.
+        - int : number of ordinal values per feature.
+          Each feature value should be in [0, max_value].
+        - array : ``max_value[i]`` is the number of ordinal values in
+          ``X[:, i]``. Each feature value should be in [0, max_value[i]].
+
+    dtype : number type, optional (default=np.float)
+        Desired dtype of output.
+
+    sparse : boolean, optional (default=False)
+        Will return sparse matrix if set True else will return an array.
+
+    handle_greater : str, 'warn', 'error' or 'clip', optional (default='warn')
+        Whether to raise an error or clip or warn if an
+        ordinal feature >= max_value is passed in.
+
+        - 'error': raise error if feature >= max_value is passed in.
+        - 'clip': all the feature values >= max_value are clipped to
+          (max_value - 1) during transform.
+        - 'warn': same as clip but with warning.
+
+    Attributes
+    ----------
+    feature_indices_ : array of shape (n_features + 1,)
+        Feature ``i`` in the original data is mapped to columns
+        from ``feature_indices_[i]`` to ``feature_indices_[i+1]``
+
+    max_value_ : array of shape (n_features,)
+        Maximum number of values per feature.
+
+    Examples
+    --------
+    Given a dataset with three features and four samples, we let the encoder
+    find the maximum value per feature and transform the data to a unary
+    encoding.
+
+    >>> from sklearn.preprocessing import UnaryEncoder
+    >>> enc = UnaryEncoder()
+    >>> enc.fit([[0, 0, 3],
+    ...          [1, 1, 0],
+    ...          [0, 2, 1],
+    ...          [1, 0, 2]])  # doctest: +ELLIPSIS
+    UnaryEncoder(dtype=<... 'numpy.float64'>, handle_greater='warn',
+                 max_value='auto', sparse=False)
+    >>> enc.max_value_
+    array([2, 3, 4])
+    >>> enc.feature_indices_
+    array([0, 1, 3, 6])
+    >>> enc.transform([[0, 1, 2]])
+    array([[0., 1., 0., 1., 1., 0.]])
+
+    See also
+    --------
+    sklearn.preprocessing.OneHotEncoder : encodes categorical integer features
+      using a one-hot aka one-of-K scheme.
+    sklearn.preprocessing.OrdinalEncoder : performs an ordinal (integer)
+      encoding of the categorical features.
+    """
+    def __init__(self, max_value="auto", dtype=np.float64, sparse=False,
+                 handle_greater='warn'):
+        self.max_value = max_value
+        self.dtype = dtype
+        self.sparse = sparse
+        self.handle_greater = handle_greater
+
+    def fit(self, X, y=None):
+        """Fit UnaryEncoder to X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_feature)
+            Input array of type int. All feature values should be
+            non-negative otherwise will raise a ValueError.
+        """
+        X = check_array(X, dtype=np.int)
+        if self.handle_greater not in ['warn', 'error', 'clip']:
+            raise ValueError("handle_greater should be either 'warn', 'error' "
+                             "or 'clip'. got %s" % self.handle_greater)
+        if np.any(X < 0):
+            raise ValueError("X needs to contain only non-negative integers.")
+        _, n_features = X.shape
+
+        if isinstance(self.max_value, str) and self.max_value == 'auto':
+            max_value = np.max(X, axis=0) + 1
+        elif isinstance(self.max_value, numbers.Integral):
+            max_value = np.empty(n_features, dtype=np.int)
+            max_value.fill(self.max_value)
+        else:
+            try:
+                max_value = np.asarray(self.max_value, dtype=int)
+            except (ValueError, TypeError):
+                raise TypeError(
+                    "Wrong type for parameter `max_value`. Expected"
+                    " 'auto', int or array of ints, got %r" % self.max_value
+                    )
+            if max_value.ndim < 1 or max_value.shape[0] != X.shape[1]:
+                raise ValueError("Shape mismatch: if max_value is an array,"
+                                 " it has to be of shape (n_features,).")
+
+        self.max_value_ = max_value
+        max_value = np.hstack([[0], max_value - 1])
+        indices = np.cumsum(max_value)
+        self.feature_indices_ = indices
+
+        if self.max_value != 'auto' and self.handle_greater == 'error':
+            mask = (X >= self.max_value_).ravel()
+            if np.any(mask):
+                raise ValueError("handle_greater='error' but found %d feature"
+                                 " values which exceeds max_value."
+                                 % np.count_nonzero(mask))
+        return self
+
+    def transform(self, X):
+        """Transform X using Ordinal encoding.
+
+        Parameters
+        ----------
+        X : array-like, of shape (n_samples, n_features)
+            Input array of type int.
+            All feature values should be non-negative otherwise ValueError
+            will be raised.
+
+        Returns
+        -------
+        X_tr: array-like or sparse matrix, of shape \
+            (n_samples, n_encoded_features)
+            Transformed input.
+        """
+        check_is_fitted(self, 'max_value_')
+        X = check_array(X, dtype=np.int)
+        if np.any(X < 0):
+            raise ValueError("X needs to contain only non-negative integers.")
+        n_samples, n_features = X.shape
+
+        indices = self.feature_indices_
+        if n_features != indices.shape[0] - 1:
+            raise ValueError("X has different shape than during fitting."
+                             " Expected %d, got %d."
+                             % (indices.shape[0] - 1, n_features))
+
+        # We clip those ordinal features of X that are greater than max_value_
+        # using mask if self.handle_greater is "clip".
+        # This means, the row_indices and col_indices corresponding to the
+        # greater ordinal feature are all filled with ones.
+        mask = (X >= self.max_value_).ravel()
+        if np.any(mask):
+            if self.handle_greater == 'warn':
+                warnings.warn("Found %d feature values which exceeds "
+                              "max_value during transform, clipping them."
+                              % np.count_nonzero(mask))
+            elif self.handle_greater == 'error':
+                raise ValueError("handle_greater='error' but found %d feature"
+                                 " values which exceeds max_value during "
+                                 "transform." % np.count_nonzero(mask))
+
+        X_ceil = np.where(mask.reshape(X.shape), self.max_value_ - 1, X)
+        column_start = np.tile(indices[:-1], n_samples)
+        column_end = (indices[:-1] + X_ceil).ravel()
+        column_indices = np.hstack([np.arange(s, e) for s, e
+                                   in zip(column_start, column_end)])
+        row_indices = np.repeat(np.arange(n_samples, dtype=np.int32),
+                                X_ceil.sum(axis=1))
+        data = np.ones(X_ceil.ravel().sum())
+        out = sparse.csr_matrix((data, (row_indices, column_indices)),
+                                shape=(n_samples, indices[-1]),
+                                dtype=self.dtype)
+
+        return out if self.sparse else out.toarray()
+
+    def inverse_transform(self, X):
+        """Convert the data back to the original representation.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix of shape \
+            (n_samples, n_encoded_features)
+            The transformed data.
+
+        Returns
+        -------
+        X_tr : array-like of shape (n_samples, n_features)
+            Inverse transformed array.
+        """
+
+        check_is_fitted(self, 'max_value_')
+        X = check_array(X, accept_sparse='csr', ensure_min_features=0)
+
+        n_samples, _ = X.shape
+        n_features = len(self.max_value_)
+        n_encoded_features = self.feature_indices_[-1]
+
+        # validate shape of passed X
+        msg = ("Shape of the passed X data is not correct. Expected {0} "
+               "columns, got {1}.")
+        if X.shape[1] != n_encoded_features:
+            raise ValueError(msg.format(n_encoded_features, X.shape[1]))
+
+        # return float dtype, even though it will contain int values
+        X_tr = np.zeros((n_samples, n_features), dtype=np.float)
+
+        for feature_idx, (start, stop) in enumerate(zip(
+                self.feature_indices_,
+                self.feature_indices_[1:])):
+
+            # sub = portion of the transformed matrix that corresponds to the
+            # current feature
+            sub = X[:, start:stop]
+
+            # the original category is the number or non-zero columns
+            categories = (sub != 0).sum(axis=1).ravel()
+            X_tr[:, feature_idx] = categories
+
+        return X_tr
diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py
index 792de88aa37de..59fd1e47a80e5 100644
--- a/sklearn/preprocessing/tests/test_encoders.py
+++ b/sklearn/preprocessing/tests/test_encoders.py
@@ -20,6 +20,7 @@
 
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.preprocessing import OrdinalEncoder
+from sklearn.preprocessing import UnaryEncoder
 
 
 def toarray(a):
@@ -676,3 +677,285 @@ def test_one_hot_encoder_warning():
     enc = OneHotEncoder()
     X = [['Male', 1], ['Female', 3]]
     np.testing.assert_no_warnings(enc.fit_transform, X)
+
+
+def _generate_random_features_matrix(n_samples=10, n_features=3,
+                                     n_categories_max=3):
+    rng = np.random.RandomState(6)
+    X = rng.randint(n_categories_max, size=(n_samples, n_features))
+    return X
+
+
+def test_unary_encoder():
+    X = np.arange(5).reshape(-1, 1)
+    enc = UnaryEncoder(5)
+    Xt = enc.fit_transform(X)
+    assert_array_equal(Xt, [[0, 0, 0, 0],   # 0
+                            [1, 0, 0, 0],   # 1
+                            [1, 1, 0, 0],   # 2
+                            [1, 1, 1, 0],   # 3
+                            [1, 1, 1, 1]])  # 4
+    Xt2 = enc.transform(X)
+    assert_array_equal(Xt2, Xt)
+
+
+def test_unary_encoder_stack():
+    # multiple input features stack to same output
+    rng = np.random.RandomState(6)
+    categories = rng.randint(2, 10)
+    size = rng.randint(1, 10)
+    n_features = rng.randint(2, 10)
+
+    encoder = UnaryEncoder(categories, sparse=False)
+    X_multi = _generate_random_features_matrix(size, n_features, categories)
+    X_multi_t = encoder.fit_transform(X_multi)
+    assert_equal(X_multi_t.shape, (size, n_features * (categories - 1)))
+
+    expected = np.hstack([encoder.fit_transform(X_multi[:, i:(i + 1)])
+                          for i in range(X_multi.shape[1])])
+    assert_array_equal(expected, X_multi_t)
+
+
+def test_unary_encoder_dense_sparse():
+    # test dense output in comparison to sparse results.
+    rng = np.random.RandomState(6)
+    categories = rng.randint(1, 10)
+    size = rng.randint(1, 10)
+    n_features = rng.randint(2, 10)
+
+    sparse_encoder = UnaryEncoder(categories, sparse=True)
+    dense_encoder = UnaryEncoder(categories)
+
+    X = _generate_random_features_matrix(size, n_features, categories)
+    X_trans_sparse = sparse_encoder.fit_transform(X)
+    X_trans_dense = dense_encoder.fit_transform(X)
+
+    assert_array_equal(X_trans_sparse.toarray(), X_trans_dense)
+
+
+def test_unary_encoder_handle_greater():
+    X = np.array([[0, 2, 1], [1, 0, 3], [2, 0, 2]])
+    y = np.array([[4, 1, 1]])
+
+    # Test that encoder raises error for greater features during transform.
+    encoder = UnaryEncoder(handle_greater='error')
+    encoder.fit(X)
+    assert_raises(ValueError, encoder.transform, y)
+
+    encoder = UnaryEncoder(handle_greater='error')
+    assert_array_equal(encoder.fit_transform(y),
+                       np.array([[1.,  1.,  1.,  1.,  1.,  1.]]))
+
+    # Test that encoder raises error for greater features during fit when
+    # categories is explicitly set.
+    encoder = UnaryEncoder(handle_greater='error', max_value=[2, 3, 4])
+    assert_raises(ValueError, encoder.fit, X)
+
+    encoder = UnaryEncoder(handle_greater='error', max_value=[2, 3, 4])
+    assert_raises(ValueError, encoder.fit_transform, X)
+
+    encoder = UnaryEncoder(handle_greater='error', max_value=[5, 2, 2])
+    encoder.fit(y)
+    assert_array_equal(encoder.transform(y),
+                       np.array([[1.,  1.,  1.,  1.,  1.,  1.]]))
+
+    encoder = UnaryEncoder(handle_greater='error', max_value=[5, 2, 2])
+    assert_array_equal(encoder.fit_transform(y),
+                       np.array([[1.,  1.,  1.,  1.,  1.,  1.]]))
+
+    # Test the clip option.
+    encoder = UnaryEncoder(handle_greater='clip')
+    encoder.fit(X)
+    assert_array_equal(
+        encoder.transform(y),
+        np.array([[1.,  1.,  1.,  0.,  1.,  0.,  0.]]))
+
+    encoder = UnaryEncoder(handle_greater='clip', max_value=[3, 2, 2])
+    assert_array_equal(
+        encoder.fit_transform(y),
+        np.array([[1.,  1.,  1.,  1.]]))
+
+    # Test the warn option.
+    encoder = UnaryEncoder()
+    encoder.fit(X)
+    w = ('Found 1 feature values which exceeds max_value during transform, '
+         'clipping them.')
+    y_transformed = assert_warns_message(UserWarning, w, encoder.transform, y)
+    assert_array_equal(
+        y_transformed,
+        np.array([[1.,  1.,  1.,  0.,  1.,  0.,  0.]]))
+
+    encoder = UnaryEncoder(max_value=[3, 2, 2])
+    y_transformed = assert_warns_message(UserWarning, w,
+                                         encoder.fit_transform, y)
+    assert_array_equal(
+        y_transformed,
+        np.array([[1.,  1.,  1.,  1.]]))
+
+    encoder = UnaryEncoder(max_value=[5, 2, 2])
+    assert_array_equal(
+        encoder.fit_transform(y),
+        np.array([[1.,  1.,  1.,  1.,  1.,  1.]]))
+
+    # Raise error if handle_greater is neither clip nor error.
+    encoder = UnaryEncoder(handle_greater='42')
+    assert_raises(ValueError, encoder.fit, y)
+
+
+def test_unary_encoder_errors():
+    rng = np.random.RandomState(6)
+    max_value = rng.randint(2, 10)
+    size = rng.randint(1, 10)
+    n_features = rng.randint(2, 10)
+    delta = rng.randint(1, 10)
+
+    encoder = UnaryEncoder(max_value)
+    X = _generate_random_features_matrix(size, n_features, max_value)
+    encoder.fit(X)
+
+    # test that an error is raised when different shape
+    larger_n_features = n_features + delta
+    X_too_large = _generate_random_features_matrix(size, larger_n_features,
+                                                   max_value)
+    assert_raises(ValueError, encoder.transform, X_too_large)
+    error_msg = ("X has different shape than during fitting."
+                 " Expected {}, got {}.".format(n_features, larger_n_features))
+    assert_raises_regex(ValueError, error_msg, encoder.transform, X_too_large)
+
+    # test that an error is raised when out of bounds
+    encoder = UnaryEncoder(max_value, handle_greater='error')
+    X = _generate_random_features_matrix(size, n_features, max_value)
+    encoder.fit(X)
+    X[0][0] = max_value + delta
+    X_out_of_bounds = X
+    assert_raises(ValueError, encoder.transform, X_out_of_bounds)
+    error_msg = ("handle_greater='error' but found 1 feature values which "
+                 "exceeds max_value during transform.")
+    assert_raises_regex(ValueError, error_msg, encoder.transform,
+                        X_out_of_bounds)
+
+    # test exception on wrong init param
+    assert_raises(TypeError, UnaryEncoder(max_value=np.int).fit, X)
+
+    # test negative input to fit
+    encoder = UnaryEncoder()
+    assert_raises(ValueError, encoder.fit, [[0], [-1]])
+
+    # test negative input to transform
+    encoder.fit([[0], [1]])
+    assert_raises(ValueError, encoder.transform, [[0], [-1]])
+
+
+def test_unary_encoder_edge_cases():
+    EDGE_CASES = [
+        (
+            [[0], [1], [2]],
+            np.array([[0, 0], [1, 0], [1, 1]]),
+        ),
+        (
+            [[0], [0], [1]],
+            np.array([[0], [0], [1]]),
+        ),
+        (
+            [[0, 0], [0, 0], [0, 1]],
+            np.array([[0], [0], [1]]),
+        ),
+    ]
+
+    for input_matrix, expected_matrix in EDGE_CASES:
+        transformed = UnaryEncoder().fit_transform(input_matrix)
+        assert_array_equal(transformed, expected_matrix)
+
+
+def test_unary_encoder_max_value_int():
+    # Test UnaryEncoder's max_value parameter when set as an int.
+    rng = np.random.RandomState(6)
+    max_value = rng.randint(2, 10)
+    size = rng.randint(1, 10)
+    n_features = rng.randint(2, 10)
+    delta = rng.randint(1, 10)
+
+    encoder_max_value = max_value + delta
+    unary_max_value = encoder_max_value - 1
+    enc = UnaryEncoder(max_value=encoder_max_value)
+
+    X = _generate_random_features_matrix(size, n_features, max_value)
+    X_trans = enc.fit_transform(X)
+    assert_equal(X_trans.shape, (size, unary_max_value * n_features))
+    assert_array_equal(
+        enc.feature_indices_,
+        np.arange(0, (unary_max_value * n_features) + 1, unary_max_value)
+    )
+    assert_array_equal(
+        enc.max_value_,
+        np.array([encoder_max_value] * n_features)
+    )
+
+
+def test_unary_encoder_max_value_array():
+    # Test UnaryEncoder's max_value parameter when set as an array.
+    rng = np.random.RandomState(6)
+    max_value = rng.randint(2, 10)
+    size = rng.randint(1, 10)
+    n_features = rng.randint(2, 10)
+    delta = rng.randint(1, 10)
+
+    # Test ideal case is working fine
+    X = _generate_random_features_matrix(size, n_features, max_value)
+    max_value_array = list(np.max(X, axis=0) + 1)
+    enc = UnaryEncoder(max_value=max_value_array)
+    X_trans = enc.fit_transform(X)
+    assert_equal(X_trans.shape, (size, sum(max_value_array) - n_features))
+    assert_array_equal(
+        enc.feature_indices_,
+        np.cumsum(np.array([1] + max_value_array) - 1)
+    )
+    assert_array_equal(
+        enc.max_value_,
+        np.array(max_value_array)
+    )
+
+    # Test that fit_transform raises error when len(max_value) != n_features
+    max_value_array = rng.randint(2, 10, n_features + delta)
+    enc = UnaryEncoder(max_value=max_value_array)
+    X = _generate_random_features_matrix(size, n_features, max_value)
+    assert_raises(ValueError, enc.fit_transform, X)
+
+    # Test that fit_transform raises error when len(max_value) != n_features
+    enc = UnaryEncoder(max_value=[])
+    X = _generate_random_features_matrix(size, n_features, max_value)
+    assert_raises(ValueError, enc.fit_transform, X)
+
+
+@pytest.mark.parametrize('sparse_', (True, False))
+@pytest.mark.parametrize('X', (
+    [[0], [0]],  # only one category (transformed into [])
+    [[1], [1]],  # only one category but implicitely 2
+    [[1, 0], [1, 1], [0, 1], [0, 2]]  # multiple categories
+))
+def test_unary_encoder_inverse_transform(sparse_, X):
+    enc = UnaryEncoder(sparse=sparse_)
+    assert_array_equal(X, enc.inverse_transform(enc.fit_transform(X)))
+
+
+def test_unary_encoder_inverse_transform_input():
+    X = [[1, 0],  # will be transformed into 1 + 2 = 3 columns
+         [1, 1],
+         [0, 1],
+         [0, 2]]
+    enc = UnaryEncoder().fit(X)
+    bad_X_tr = [[1, 1, 1, 0]]  # 4 columns
+    assert_raises_regex(
+        ValueError,
+        "Shape of the passed X data is not correct. Expected 3 columns, got 4",
+        enc.inverse_transform, bad_X_tr
+    )
+
+    # Also check that inverse_transform still works on non-binary matrices.
+    # Non-zero values are treated as ones.
+    X_inv = enc.inverse_transform([[4, 100, 0]])  # Treated as [1, 1, 0])
+    assert_array_equal(X_inv, [[1, 1]])
+    X_inv = enc.inverse_transform([[4, 100, 123]])  # Treated as [1, 1, 1])
+    assert_array_equal(X_inv, [[1, 2]])
+    X_inv = enc.inverse_transform([[0, 1, 123]])  # Treated as [0, 1, 1])
+    assert_array_equal(X_inv, [[0, 2]])
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 699026b9e47ee..1772aa87714f9 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -595,7 +595,7 @@ def uninstall_mldata_mock():
              'TfidfVectorizer', 'IsotonicRegression',
              'OneHotEncoder', 'RandomTreesEmbedding', 'OrdinalEncoder',
              'FeatureHasher', 'DummyClassifier', 'DummyRegressor',
-             'TruncatedSVD', 'PolynomialFeatures',
+             'TruncatedSVD', 'PolynomialFeatures', 'UnaryEncoder',
              'GaussianRandomProjectionHash', 'HashingVectorizer',
              'CheckingClassifier', 'PatchExtractor', 'CountVectorizer',
              # GradientBoosting base estimators, maybe should