diff --git a/src/TensorFlowNET.Core/APIs/tf.array.cs b/src/TensorFlowNET.Core/APIs/tf.array.cs
index 6a646512a..4d9c3da58 100644
--- a/src/TensorFlowNET.Core/APIs/tf.array.cs
+++ b/src/TensorFlowNET.Core/APIs/tf.array.cs
@@ -91,8 +91,7 @@ public Tensor concat(IEnumerable<Tensor> values, int axis, string name = "concat
                     return identity(values.First(), name: scope);
                 });
             }
-
-            return gen_array_ops.concat_v2(values.ToArray(), ops.convert_to_tensor(axis), name: name);
+            return array_ops.concat(values.ToArray(), axis, name: name);
         }
 
         /// <summary>
@@ -163,14 +162,17 @@ public Tensor transpose<T1>(T1 a, Axis perm = null, string name = "transpose", b
         /// Reverses specific dimensions of a tensor.
         /// </summary>
         /// <param name="tensor"></param>
-        /// <param name="axis"></param>
+        /// <param name="axis">The indices of the dimensions to reverse. Must be in the range [-rank(tensor), rank(tensor)).</param>
         /// <param name="name"></param>
         /// <returns></returns>
-        public Tensor reverse(Tensor tensor, int[] axis, string name = null)
-            => gen_array_ops.reverse(tensor, ops.convert_to_tensor(axis), name: name);
-
-        public Tensor reverse(Tensor tensor, Tensor axis, string name = null)
-            => gen_array_ops.reverse(tensor, axis, name: name);
+        public Tensor reverse(Tensor tensor, Axis axis, string name = null)
+        {
+            if (axis.IsScalar)
+            {
+                axis = new Axis(axis.axis);
+            }
+            return array_ops.reverse(tensor, axis, name: name);
+        }
 
         /// <summary>
         /// Returns the rank of a tensor.
diff --git a/src/TensorFlowNET.Core/APIs/tf.image.cs b/src/TensorFlowNET.Core/APIs/tf.image.cs
index 9230b50dc..ac9cbc60d 100644
--- a/src/TensorFlowNET.Core/APIs/tf.image.cs
+++ b/src/TensorFlowNET.Core/APIs/tf.image.cs
@@ -14,6 +14,10 @@ You may obtain a copy of the License at
    limitations under the License.
 ******************************************************************************/
 
+using OneOf.Types;
+using System;
+using System.Buffers.Text;
+using Tensorflow.Contexts;
 using static Tensorflow.Binding;
 
 namespace Tensorflow
@@ -162,17 +166,108 @@ public Tensor ssim_multiscale(Tensor img1, Tensor img2, float max_val, float[] p
             public Tensor sobel_edges(Tensor image)
                 => image_ops_impl.sobel_edges(image);
 
-            public Tensor decode_jpeg(Tensor contents,
-                        int channels = 0,
-                        int ratio = 1,
-                        bool fancy_upscaling = true,
-                        bool try_recover_truncated = false,
-                        int acceptable_fraction = 1,
-                        string dct_method = "",
-                        string name = null)
-                => gen_image_ops.decode_jpeg(contents, channels: channels, ratio: ratio,
-                    fancy_upscaling: fancy_upscaling, try_recover_truncated: try_recover_truncated,
-                    acceptable_fraction: acceptable_fraction, dct_method: dct_method);
+            /// <summary>
+            /// Adjust contrast of RGB or grayscale images.
+            /// </summary>
+            /// <param name="images">Images to adjust.  At least 3-D.</param>
+            /// <param name="contrast_factor"></param>
+            /// <param name="name">A float multiplier for adjusting contrast.</param>
+            /// <returns>The contrast-adjusted image or images.</returns>
+            public Tensor adjust_contrast(Tensor images, float contrast_factor, string name = null)
+                => gen_image_ops.adjust_contrastv2(images, contrast_factor, name);
+
+            /// <summary>
+            /// Adjust hue of RGB images.
+            /// </summary>
+            /// <param name="images">RGB image or images. The size of the last dimension must be 3.</param>
+            /// <param name="delta">float.  How much to add to the hue channel.</param>
+            /// <param name="name">A name for this operation (optional).</param>
+            /// <returns>Adjusted image(s), same shape and DType as `image`.</returns>
+            /// <exception cref="ValueError">if `delta` is not in the interval of `[-1, 1]`.</exception>
+            public Tensor adjust_hue(Tensor images, float delta, string name = null)
+            {
+                if (tf.Context.executing_eagerly())
+                {
+                    if (delta < -1f || delta > 1f)
+                        throw new ValueError("delta must be in the interval [-1, 1]");
+                }
+                return gen_image_ops.adjust_hue(images, delta, name: name);
+            }
+
+            /// <summary>
+            /// Adjust saturation of RGB images.
+            /// </summary>
+            /// <param name="image">RGB image or images. The size of the last dimension must be 3.</param>
+            /// <param name="saturation_factor">float. Factor to multiply the saturation by.</param>
+            /// <param name="name">A name for this operation (optional).</param>
+            /// <returns>Adjusted image(s), same shape and DType as `image`.</returns>
+            public Tensor adjust_saturation(Tensor image, float saturation_factor, string name = null)
+                => gen_image_ops.adjust_saturation(image, saturation_factor, name);
+
+            /// <summary>
+            /// Greedily selects a subset of bounding boxes in descending order of score.
+            /// </summary>
+            /// <param name="boxes">
+            /// A 4-D float `Tensor` of shape `[batch_size, num_boxes, q, 4]`. If `q` 
+            /// is 1 then same boxes are used for all classes otherwise, if `q` is equal 
+            /// to number of classes, class-specific boxes are used.
+            /// </param>
+            /// <param name="scores">
+            /// A 3-D float `Tensor` of shape `[batch_size, num_boxes, num_classes]` 
+            /// representing a single score corresponding to each box(each row of boxes).
+            /// </param>
+            /// <param name="max_output_size_per_class">
+            /// A scalar integer `Tensor` representing the 
+            /// maximum number of boxes to be selected by non-max suppression per class
+            /// </param>
+            /// <param name="max_total_size">
+            /// A int32 scalar representing maximum number of boxes retained 
+            /// over all classes.Note that setting this value to a large number may 
+            /// result in OOM error depending on the system workload.
+            /// </param>
+            /// <param name="iou_threshold">
+            /// A float representing the threshold for deciding whether boxes 
+            /// overlap too much with respect to IOU.
+            /// </param>
+            /// <param name="score_threshold">
+            /// A float representing the threshold for deciding when to 
+            /// remove boxes based on score.
+            /// </param>
+            /// <param name="pad_per_class">
+            /// If false, the output nmsed boxes, scores and classes are 
+            /// padded/clipped to `max_total_size`. If true, the output nmsed boxes, scores and classes are padded to be of length `max_size_per_class`*`num_classes`, 
+            /// unless it exceeds `max_total_size` in which case it is clipped to `max_total_size`. Defaults to false.
+            /// </param>
+            /// <param name="clip_boxes">
+            /// If true, the coordinates of output nmsed boxes will be clipped 
+            /// to[0, 1]. If false, output the box coordinates as it is. Defaults to true.
+            /// </param>
+            /// <returns>
+            /// 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor containing the non-max suppressed boxes.
+            /// 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing the scores for the boxes.
+            /// 'nmsed_classes': A [batch_size, max_detections] float32 tensor containing the class for boxes.
+            /// 'valid_detections': A [batch_size] int32 tensor indicating the number of
+            ///     valid detections per batch item. Only the top valid_detections[i] entries
+            ///     in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
+            ///     entries are zero paddings.
+            /// </returns>
+            public (Tensor, Tensor, Tensor, Tensor) combined_non_max_suppression(
+                        Tensor boxes, 
+                        Tensor scores, 
+                        int max_output_size_per_class, 
+                        int max_total_size,
+                        float iou_threshold,
+                        float score_threshold,
+                        bool pad_per_class = false,
+                        bool clip_boxes = true)
+            {
+                var iou_threshold_t = ops.convert_to_tensor(iou_threshold, TF_DataType.TF_FLOAT, name: "iou_threshold");
+                var score_threshold_t = ops.convert_to_tensor(score_threshold, TF_DataType.TF_FLOAT, name: "score_threshold");
+                var max_total_size_t = ops.convert_to_tensor(max_total_size);
+                var max_output_size_per_class_t = ops.convert_to_tensor(max_output_size_per_class);
+                return gen_image_ops.combined_non_max_suppression(boxes, scores, max_output_size_per_class_t, max_total_size_t,
+                                       iou_threshold_t, score_threshold_t, pad_per_class, clip_boxes);
+            }
 
             /// <summary>
             /// Extracts crops from the input image tensor and resizes them using bilinear sampling or nearest neighbor sampling (possibly with aspect ratio change) to a common output size specified by crop_size. This is more general than the crop_to_bounding_box op which extracts a fixed size slice from the input image and does not allow resizing or aspect ratio change.
@@ -187,7 +282,19 @@ public Tensor decode_jpeg(Tensor contents,
             /// <param name="name">A name for the operation (optional).</param>
             /// <returns>A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth].</returns>
             public Tensor crop_and_resize(Tensor image, Tensor boxes, Tensor box_ind, Tensor crop_size, string method = "bilinear", float extrapolation_value = 0f, string name = null) =>
-                image_ops_impl.crop_and_resize(image, boxes, box_ind, crop_size, method, extrapolation_value, name);
+                gen_image_ops.crop_and_resize(image, boxes, box_ind, crop_size, method, extrapolation_value, name);
+
+            public Tensor decode_jpeg(Tensor contents,
+                        int channels = 0,
+                        int ratio = 1,
+                        bool fancy_upscaling = true,
+                        bool try_recover_truncated = false,
+                        int acceptable_fraction = 1,
+                        string dct_method = "",
+                        string name = null)
+                => gen_image_ops.decode_jpeg(contents, channels: channels, ratio: ratio,
+                    fancy_upscaling: fancy_upscaling, try_recover_truncated: try_recover_truncated,
+                    acceptable_fraction: acceptable_fraction, dct_method: dct_method);
 
             public Tensor extract_glimpse(Tensor input, Tensor size, Tensor offsets, bool centered = true, bool normalized = true,
                 bool uniform_noise = true, string name = null)
diff --git a/src/TensorFlowNET.Core/APIs/tf.math.cs b/src/TensorFlowNET.Core/APIs/tf.math.cs
index ffbc43738..da54a9dd7 100644
--- a/src/TensorFlowNET.Core/APIs/tf.math.cs
+++ b/src/TensorFlowNET.Core/APIs/tf.math.cs
@@ -46,6 +46,17 @@ public Tensor multiply(Tensor x, Tensor y, string name = null)
             public Tensor divide_no_nan(Tensor a, Tensor b, string name = null)
                 => math_ops.div_no_nan(a, b);
 
+            /// <summary>
+            /// Computes the Euclidean norm of elements across dimensions of a tensor.
+            /// </summary>
+            /// <param name="input_tensor">The tensor to reduce. Should have numeric type.</param>
+            /// <param name="axis">The dimensions to reduce. If `None` (the default), reduces all dimensions.Must be in the range `[-rank(input_tensor), rank(input_tensor))`</param>
+            /// <param name="keepdims">If true, retains reduced dimensions with length 1.</param>
+            /// <param name="name">A name for the operation (optional).</param>
+            /// <returns>The reduced tensor, of the same dtype as the input_tensor.</returns>
+            public Tensor reduce_euclidean_norm(Tensor input_tensor, Axis? axis = null, bool keepdims = false, string name = null)
+                => math_ops.reduce_euclidean_norm(input_tensor, axis: axis, keepdims: keepdims, name);
+
             public Tensor square(Tensor x, string name = null)
                 => math_ops.square(x, name: name);
 
@@ -611,5 +622,7 @@ public Tensor squared_difference(Tensor x, Tensor y, string name = null)
             => gen_math_ops.squared_difference(x: x, y: y, name: name);
         public Tensor complex(Tensor real, Tensor imag, Tensorflow.TF_DataType? dtype = null, 
                 string name = null) => gen_ops.complex(real, imag, dtype, name);
+        public Tensor exp(Tensor x,
+                string name = null) => gen_math_ops.exp(x, name);
     }
 }
diff --git a/src/TensorFlowNET.Core/APIs/tf.nn.cs b/src/TensorFlowNET.Core/APIs/tf.nn.cs
index e5cd4e569..397c68c7c 100644
--- a/src/TensorFlowNET.Core/APIs/tf.nn.cs
+++ b/src/TensorFlowNET.Core/APIs/tf.nn.cs
@@ -144,16 +144,8 @@ public Tensor batch_normalization(Tensor x,
                         Tensor offset,
                         Tensor scale,
                         float variance_epsilon,
-                        string name = null)
-            {
-                var inv = math_ops.rsqrt(variance + variance_epsilon);
-                tf_with(ops.name_scope(name, "batchnorm", (x, mean, variance, scale, offset)), scope =>
-                {
-                    if (scale != null) inv *= scale;
-                });
-                if (offset != null) return x * math_ops.cast(inv, x.dtype) + math_ops.cast(offset - mean * inv, dtype: x.dtype);
-                else return x * math_ops.cast(inv, x.dtype) + math_ops.cast(-mean * inv, dtype: x.dtype);
-            }
+                        string name = null) => nn_impl.batch_normalization(x, mean, variance, offset, scale, variance_epsilon, name);
+
 
             public Tensor max_pool(Tensor value, int[] ksize, int[] strides, string padding, string data_format = "NHWC", string name = null)
                 => nn_ops.max_pool(value, ksize, strides, padding, data_format: data_format, name: name);
diff --git a/src/TensorFlowNET.Core/APIs/tf.reshape.cs b/src/TensorFlowNET.Core/APIs/tf.reshape.cs
index 5da7b795f..102a81323 100644
--- a/src/TensorFlowNET.Core/APIs/tf.reshape.cs
+++ b/src/TensorFlowNET.Core/APIs/tf.reshape.cs
@@ -31,6 +31,6 @@ public Tensor reshape(Tensor tensor,
         public Tensor reshape(Tensor tensor,
                 object[] shape,
                 string name = null)
-            => gen_array_ops.reshape(tensor, ops.convert_to_tensor(shape), name);
+            => array_ops.reshape(tensor, shape, name);
     }
 }
diff --git a/src/TensorFlowNET.Core/APIs/tf.tensor.cs b/src/TensorFlowNET.Core/APIs/tf.tensor.cs
index 45aebc0cd..b03168ab3 100644
--- a/src/TensorFlowNET.Core/APIs/tf.tensor.cs
+++ b/src/TensorFlowNET.Core/APIs/tf.tensor.cs
@@ -68,20 +68,27 @@ public Tensor strided_slice<T>(Tensor input, T[] begin, T[] end, T[] strides = n
         /// <param name="name">A name for the operation (optional)</param>
         /// <returns>if num_or_size_splits is a scalar returns num_or_size_splits Tensor objects;
         /// if num_or_size_splits is a 1-D Tensor returns num_or_size_splits.get_shape[0] Tensor objects resulting from splitting value.</returns>
-        public Tensor[] split(Tensor value, int num_split, Tensor axis, string name = null)
+        public Tensor[] split(Tensor value, int num_split, Axis axis, string name = null)
             => array_ops.split(
                 value: value,
                 num_or_size_splits: num_split,
                 axis: axis,
                 name: name);
 
-        public Tensor[] split(Tensor value, int num_split, int axis, string name = null)
+        public Tensor[] split(Tensor value, int[] num_split, Axis axis, string name = null)
             => array_ops.split(
                 value: value,
                 num_or_size_splits: num_split,
-                axis: ops.convert_to_tensor(axis),
+                axis: axis,
                 name: name);
 
+        //public Tensor[] split(Tensor value, int num_split, Axis axis, string name = null)
+        //    => array_ops.split(
+        //        value: value,
+        //        num_or_size_splits: num_split,
+        //        axis: axis,
+        //        name: name);
+
         public Tensor ensure_shape(Tensor x, Shape shape, string name = null)
         {
             return gen_ops.ensure_shape(x, shape, name);
diff --git a/src/TensorFlowNET.Core/APIs/tf.tile.cs b/src/TensorFlowNET.Core/APIs/tf.tile.cs
index 65975ac83..a3b497e8a 100644
--- a/src/TensorFlowNET.Core/APIs/tf.tile.cs
+++ b/src/TensorFlowNET.Core/APIs/tf.tile.cs
@@ -23,7 +23,7 @@ public Tensor tile(Tensor input, Tensor multiples, string name = null)
             => gen_array_ops.tile(input, multiples, name);
 
         public Tensor tile(Tensor input, object[] multiples, string name = null)
-            => gen_array_ops.tile(input, ops.convert_to_tensor(multiples), name);
+            => array_ops.tile(input, constant_op.constant(shape_utils.from_object_array(multiples).dims), name);
 
         public Tensor tile(Tensor input, Shape multiples, string name = null) 
         {
diff --git a/src/TensorFlowNET.Core/Framework/IndexedSlices.cs b/src/TensorFlowNET.Core/Framework/IndexedSlices.cs
index 24d356fbb..bac5e6fb1 100644
--- a/src/TensorFlowNET.Core/Framework/IndexedSlices.cs
+++ b/src/TensorFlowNET.Core/Framework/IndexedSlices.cs
@@ -49,12 +49,25 @@ public IndexedSlices(Tensor values, Tensor indices, Tensor dense_shape = null)
 
         public static implicit operator Tensor(IndexedSlices indexedSlices)
         {
-            return indexedSlices.values;
+            return _indexed_slices_to_tensor(indexedSlices);
         }
 
         public static implicit operator IndexedSlices(Tensor tensor)
         {
             return tensor.Tag as IndexedSlices;
         }
+
+        /// <summary>
+        /// Converts an IndexedSlices object `value` to a Tensor.
+        /// </summary>
+        /// <param name="indexedSlices"></param>
+        /// <param name="dtype"></param>
+        /// <param name="name"></param>
+        /// <param name="as_ref"></param>
+        /// <returns></returns>
+        public static Tensor _indexed_slices_to_tensor(IndexedSlices indexedSlices, TF_DataType dtype = TF_DataType.DtInvalid, String name = "", bool as_ref = false)
+        {
+            return gen_math_ops.unsorted_segment_sum(indexedSlices.values, indexedSlices.indices, indexedSlices.dense_shape.slice(0));
+        }
     }
 }
diff --git a/src/TensorFlowNET.Core/GlobalUsing.cs b/src/TensorFlowNET.Core/GlobalUsing.cs
index 2fd5b437b..7e02c9083 100644
--- a/src/TensorFlowNET.Core/GlobalUsing.cs
+++ b/src/TensorFlowNET.Core/GlobalUsing.cs
@@ -3,4 +3,7 @@
 global using System.Text;
 global using System.Collections;
 global using System.Data;
-global using System.Linq;
\ No newline at end of file
+global using System.Linq;
+global using Tensorflow.Keras.Engine;
+global using Tensorflow.Framework.Models;
+global using static Tensorflow.Binding;
\ No newline at end of file
diff --git a/src/TensorFlowNET.Core/Gradients/array_grad.cs b/src/TensorFlowNET.Core/Gradients/array_grad.cs
index 1b6bc95ee..4b7027992 100644
--- a/src/TensorFlowNET.Core/Gradients/array_grad.cs
+++ b/src/TensorFlowNET.Core/Gradients/array_grad.cs
@@ -373,5 +373,13 @@ public static Tensor[] _TransposeGrad(Operation op, Tensor[] grads)
             var p = op.inputs[1];
             return new Tensor[] { array_ops.transpose(grads[0], array_ops.invert_permutation(p)), null };
         }
+
+        [RegisterGradient("ReverseV2")]
+        public static Tensor[] _ReverseV2Grad(Operation op, Tensor[] grads)
+        {
+            var grad = grads[0];
+            var axis = op.inputs[1];
+            return new Tensor[] { array_ops.reverse(grad, axis), null };
+        }
     }
 }
diff --git a/src/TensorFlowNET.Core/Gradients/math_grad.cs b/src/TensorFlowNET.Core/Gradients/math_grad.cs
index be1fbbba7..8c3f0f8bd 100644
--- a/src/TensorFlowNET.Core/Gradients/math_grad.cs
+++ b/src/TensorFlowNET.Core/Gradients/math_grad.cs
@@ -117,6 +117,137 @@ public static Tensor[] _DivNoNanGrad(Operation op, Tensor[] grads)
             };
         }
 
+        public static string ellipsis = "...";
+        [RegisterGradient("Einsum")]
+        public static Tensor[] _EinsumGrad(Operation op, Tensor[] grads)
+        {
+            // Gradient for Einsum.
+            string equation = (string)op.get_attr("equation");
+            string[] split_equation = equation.Split(new string[] { "->" }, StringSplitOptions.None);
+            var input_subs = split_equation[0];
+            var output_subs = split_equation[1];
+
+            if (op.inputs.Length == 1)
+            {
+                var input_shape = array_ops.shape(op.inputs[0]);
+                var reduced_label_set = new HashSet<char>(new HashSet<char>(input_subs).Except(new HashSet<char>(output_subs + ellipsis)));
+                if (reduced_label_set.Count == 0)
+                    return new Tensor[] { math_ops.einsum(string.Format("{0}->{1}", output_subs, input_subs), new Tensors(grads)) };
+                return new Tensor[] { _GetGradReduced(new Tensors(grads), output_subs, input_subs, input_shape, reduced_label_set) };
+            }
+
+            string[] split_input_subs = input_subs.Split(new string[] { "," }, StringSplitOptions.None);
+            var x_subs = split_input_subs[0];
+            var y_subs = split_input_subs[1];
+            // Add ellipsis for broadcasted dimensions if any operand does not have it.
+            // This is because the equation "...ij,jk->ik" may be valid if the 0th input's
+            // batch shape is empty, but the VJP equation "jk,ik->...ij" is not valid
+            // because only the output subscripts contain ellipsis.
+            if (output_subs.Contains(ellipsis))
+            {
+                if (!x_subs.Contains(ellipsis))
+                    x_subs += ellipsis;
+                if (!y_subs.Contains(ellipsis))
+                    y_subs += ellipsis;
+            }
+            // Obtain the gradients wrt the inputs x and y, without taking into account
+            // the unbroadcasting.
+            var x = op.inputs[0];
+            var y = op.inputs[1];
+            if (grads.GetDataType().is_complex())
+            {
+                x = math_ops.conj(x);
+                y = math_ops.conj(y);
+            }
+
+            var x_shape = array_ops.shape(x);
+            var y_shape = array_ops.shape(y);
+            var grad_x = _GetGradWrt(grads, y, x_shape, x_subs, y_subs, output_subs);
+            var grad_y = _GetGradWrt(grads, x, y_shape, y_subs, x_subs, output_subs);
+
+            if (!output_subs.Contains(ellipsis))
+                return new Tensor[] { grad_x, grad_y };
+            var bx = _GetBcastSubshape(x_subs);
+            int bx_start = bx[0], bx_end = bx[1];
+            var by = _GetBcastSubshape(y_subs);
+            int by_start = by[0], by_end = by[1];
+
+            var x_shape_static = x.shape;
+            var y_shape_static = y.shape;
+            if(x_shape_static.IsFullyDefined && 
+                y_shape_static.IsFullyDefined &&
+                x_shape_static[string.Format("{0}:{1}",bx_start,bx_end)] == y_shape_static[string.Format("{0}:{1}", by_start, by_end)])
+                return new Tensor[] { grad_x, grad_y };
+
+            var r = gen_array_ops.broadcast_gradient_args(x_shape[string.Format("{0}:{1}", bx_start, bx_end)],
+                                             y_shape[string.Format("{0}:{1}", by_start, by_end)]);
+            var rx = r[0];
+            var ry = r[1];
+            grad_x = array_ops.reshape(math_ops.reduce_sum(grad_x, bx_start + rx), x_shape);
+            grad_y = array_ops.reshape(math_ops.reduce_sum(grad_y, by_start + ry), y_shape);
+            return new Tensor[] { grad_x, grad_y };
+        }
+        protected static Tensor _GetGradWrt(Tensor[] output_grads, Tensor other_operand, Tensor input_shape,
+            string input_subs, string other_subs, string output_subs)
+        {
+            var reduced_label_set = new HashSet<char>(new HashSet<char>(input_subs).Except(new HashSet<char>(output_subs + other_subs + ".")));
+            var left_subs = string.Join("", input_subs.Where(s => !reduced_label_set.Contains(s)));
+            var grad_reduced = math_ops.einsum(string.Format("{0},{1}->{2}", output_subs, other_subs, left_subs), new Tensors((Tensors)output_grads, other_operand));
+            if (reduced_label_set.Count == 0)
+                return grad_reduced;
+            return _GetGradReduced(grad_reduced, left_subs, input_subs, input_shape, reduced_label_set);
+        }
+        protected static Tensor _GetGradReduced(Tensor output_grad, string output_subs, string input_subs, Tensor input_shape, HashSet<char> reduced_label_set)
+        {
+            string reduced_subs;
+            Tensor reduced_dims;
+            List<int> reduced_axes;
+            _GetReducedSubscripts(reduced_label_set, input_shape, input_subs, out reduced_subs, out reduced_dims, out reduced_axes);
+            bool has_repeated_labels = (
+                new HashSet<char>(input_subs).Count + new HashSet<char>(output_subs).Count <
+                input_subs.Length + output_subs.Length);
+            var input_subs_without_reduced_labels = string.Join("", input_subs.Where(s => !reduced_label_set.Contains(s)));
+
+            if (!has_repeated_labels && input_subs_without_reduced_labels == output_subs)
+            {
+                var reduced_shape = math_ops.reduced_shape(input_shape, ops.convert_to_tensor(reduced_axes));
+                return gen_array_ops.broadcast_to(array_ops.reshape(output_grad, reduced_shape), input_shape);
+            }
+            else
+            {
+                var grad_shape_with_reduced_labels = array_ops.concat(new Tensor[] { reduced_dims, array_ops.shape(new Tensors(output_grad)) }, axis: 0);
+                var reduced_shape = array_ops.concat(new Tensor[] { array_ops.ones(reduced_label_set.Count, dtype: dtypes.int32), array_ops.shape(new Tensors(output_grad)) }, axis: 0);
+                var broadcasted_grad = gen_array_ops.broadcast_to(array_ops.reshape(output_grad, reduced_shape), grad_shape_with_reduced_labels);
+                return math_ops.einsum(string.Format("{0}->{1}", reduced_subs + output_subs, input_subs), new Tensors(broadcasted_grad));
+            }
+        }
+        protected static void _GetReducedSubscripts(HashSet<char> reduced_label_set, Tensor input_shape, string subscripts, out string reduced_subs, out Tensor reduced_dims, out List<int> reduced_axes)
+        {
+            reduced_subs = string.Join("", reduced_label_set.Select(c => c.ToString()));
+            reduced_axes = reduced_subs.Select(s => _GetAxisFromLabel(subscripts, s)).ToList();
+            reduced_dims = array_ops.stack(reduced_axes.Select(ax => input_shape[ax]).ToList());
+        }
+        protected static int _GetAxisFromLabel(string subscripts, char label)
+        {
+            var splits = subscripts.Split(new string[] { ellipsis }, StringSplitOptions.None);
+            var index = splits[0].IndexOf(label);
+            if (index != -1) return index;
+            if (splits.Length < 2) throw new OutOfRangeError();
+            index = splits[1].IndexOf(label);
+            if (index != -1) return index;
+            throw new ValueError();
+        }
+        protected static int[] _GetBcastSubshape(string subscripts)
+        {
+            int start = subscripts.IndexOf(ellipsis);
+            if (start == -1) return new int[] { 0, 0 };
+            int remaining = subscripts.Length - (start + ellipsis.Length);
+            int end;
+            if (remaining > 0) end = remaining;
+            else throw new Exception();
+            return new int[] { start, end };
+        }
+
         /// <summary>
         /// Returns grad * exp(x).
         /// </summary>
diff --git a/src/TensorFlowNET.Core/Gradients/nn_grad.cs b/src/TensorFlowNET.Core/Gradients/nn_grad.cs
index a1ac97a97..a43a91b9a 100644
--- a/src/TensorFlowNET.Core/Gradients/nn_grad.cs
+++ b/src/TensorFlowNET.Core/Gradients/nn_grad.cs
@@ -365,6 +365,23 @@ public static Tensor[] _MaxPoolGrad(Operation op, Tensor[] grads)
             };
         }
 
+        [RegisterGradient("AvgPool")]
+        public static Tensor[] _AvgPoolGrad(Operation op, Tensor[] grads)
+        {
+            Tensor grad = grads[0];
+            
+            return new Tensor[]
+            {
+                gen_nn_ops.avg_pool_grad(
+                  array_ops.shape(op.inputs[0]),
+                  grad,
+                  op.get_attr_list<int>("ksize"),
+                  op.get_attr_list<int>("strides"),
+                  op.get_attr<string>("padding"),
+                  op.get_attr<string>("data_format"))
+            };
+        }
+
         /// <summary>
         /// Return the gradients for TopK.
         /// </summary>
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/ExponentialArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/ExponentialArgs.cs
new file mode 100644
index 000000000..ef024971d
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/ExponentialArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class ExponentialArgs : LayerArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/HardSigmoidArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/HardSigmoidArgs.cs
new file mode 100644
index 000000000..788e0f36d
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/HardSigmoidArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class HardSigmoidArgs : LayerArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SELUArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SELUArgs.cs
new file mode 100644
index 000000000..eb0e18446
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SELUArgs.cs
@@ -0,0 +1,11 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class SELUArgs : LayerArgs
+    {
+
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftplusArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftplusArgs.cs
new file mode 100644
index 000000000..7b4f20795
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftplusArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class SoftplusArgs : LayerArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftsignArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftsignArgs.cs
new file mode 100644
index 000000000..4e23d261d
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftsignArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class SoftsignArgs : LayerArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SwishArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SwishArgs.cs
new file mode 100644
index 000000000..3dea06a23
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SwishArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class SwishArgs : LayerArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/TanhArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/TanhArgs.cs
new file mode 100644
index 000000000..5df41b71b
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/TanhArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class TanhArgs : LayerArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Convolution/Conv2DTransposeArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Convolution/Conv2DTransposeArgs.cs
new file mode 100644
index 000000000..3daba9465
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Convolution/Conv2DTransposeArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class Conv2DTransposeArgs : Conv2DArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/AddArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/AddArgs.cs
new file mode 100644
index 000000000..016d58203
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/AddArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class AddArgs : MergeArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/ConcatenateArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/ConcatenateArgs.cs
new file mode 100644
index 000000000..4a81d139d
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/ConcatenateArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class ConcatenateArgs : MergeArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/SubtractArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/SubtractArgs.cs
new file mode 100644
index 000000000..1e3621cb6
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/SubtractArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class SubtractArgs : MergeArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling1DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling1DArgs.cs
new file mode 100644
index 000000000..e73aff766
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling1DArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class GlobalAveragePooling1DArgs : Pooling1DArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling2DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling2DArgs.cs
new file mode 100644
index 000000000..d143cf471
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling2DArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class GlobalAveragePooling2DArgs : Pooling2DArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling1DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling1DArgs.cs
new file mode 100644
index 000000000..e03227feb
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling1DArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class GlobalMaxPooling1DArgs : Pooling1DArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling2DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling2DArgs.cs
new file mode 100644
index 000000000..a95cac836
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling2DArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class GlobalMaxPooling2DArgs : Pooling2DArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/MaxPooling1DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/MaxPooling1DArgs.cs
new file mode 100644
index 000000000..4cfff2c15
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/MaxPooling1DArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class MaxPooling1DArgs : Pooling1DArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/BidirectionalArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/BidirectionalArgs.cs
new file mode 100644
index 000000000..d658a82e9
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/BidirectionalArgs.cs
@@ -0,0 +1,20 @@
+using Newtonsoft.Json;
+using System;
+using System.Collections.Generic;
+using System.Text;
+using Tensorflow.NumPy;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class BidirectionalArgs : AutoSerializeLayerArgs
+    {
+        [JsonProperty("layer")]
+        public ILayer Layer { get; set; }
+        [JsonProperty("merge_mode")]
+        public string? MergeMode { get; set; }
+        [JsonProperty("backward_layer")]
+        public ILayer BackwardLayer { get; set; }
+        public NDArray Weights { get; set; }
+    }
+
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUArgs.cs
new file mode 100644
index 000000000..cdc3097e9
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUArgs.cs
@@ -0,0 +1,29 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class GRUArgs : AutoSerializeLayerArgs
+    {
+        public int Units { get; set; }
+        public Activation Activation { get; set; }
+        public Activation RecurrentActivation { get; set; }
+        public bool UseBias { get; set; } = true;
+        public float Dropout { get; set; } = .0f;
+        public float RecurrentDropout { get; set; } = .0f;
+        public IInitializer KernelInitializer { get; set; }
+        public IInitializer RecurrentInitializer { get; set; }
+        public IInitializer BiasInitializer { get; set; }
+        public bool ReturnSequences { get;set; }
+        public bool ReturnState { get;set; }
+        public bool GoBackwards { get;set; }
+        public bool Stateful { get;set; }
+        public bool Unroll { get;set; }
+        public bool TimeMajor { get;set; }
+        public bool ResetAfter { get;set; }
+        public int Implementation { get; set; } = 2;
+
+    }
+
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUCellArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUCellArgs.cs
new file mode 100644
index 000000000..624756afe
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUCellArgs.cs
@@ -0,0 +1,39 @@
+﻿using Newtonsoft.Json;
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class GRUCellArgs : AutoSerializeLayerArgs
+    {
+        [JsonProperty("units")]
+        public int Units { get; set; }
+        // TODO(Rinne): lack of initialized value of Activation. Merging keras
+        // into tf.net could resolve it.
+        [JsonProperty("activation")]
+        public Activation Activation { get; set; }
+        [JsonProperty("recurrent_activation")]
+        public Activation RecurrentActivation { get; set; }
+        [JsonProperty("use_bias")]
+        public bool UseBias { get; set; } = true;
+        [JsonProperty("dropout")]
+        public float Dropout { get; set; } = .0f;
+        [JsonProperty("recurrent_dropout")]
+        public float RecurrentDropout { get; set; } = .0f;
+        [JsonProperty("kernel_initializer")]
+        public IInitializer KernelInitializer { get; set; }
+        [JsonProperty("recurrent_initializer")]
+        public IInitializer RecurrentInitializer { get; set; }
+        [JsonProperty("bias_initializer")]
+        public IInitializer BiasInitializer { get; set; }
+        [JsonProperty("reset_after")]
+        public bool ResetAfter { get;set; }
+        [JsonProperty("implementation")]
+        public int Implementation { get; set; } = 2;
+
+
+
+    }
+
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs
new file mode 100644
index 000000000..d441dc828
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs
@@ -0,0 +1,13 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class GRUOptionalArgs
+    {
+        public string Identifier => "GRU";
+
+        public Tensor Mask { get; set; } = null;
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMArgs.cs
index db76fda06..a6beb77e8 100644
--- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMArgs.cs
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMArgs.cs
@@ -1,9 +1,14 @@
-﻿namespace Tensorflow.Keras.ArgsDefinition.Rnn
+﻿namespace Tensorflow.Keras.ArgsDefinition
 {
     public class LSTMArgs : RNNArgs
     {
         // TODO: maybe change the `RNNArgs` and implement this class.
         public bool UnitForgetBias { get; set; }
         public int Implementation { get; set; }
+
+        public LSTMArgs Clone()
+        {
+            return (LSTMArgs)MemberwiseClone();
+        }
     }
 }
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMCellArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMCellArgs.cs
index 786236e4d..f45032312 100644
--- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMCellArgs.cs
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMCellArgs.cs
@@ -1,7 +1,7 @@
 ﻿using Newtonsoft.Json;
 using static Tensorflow.Binding; 
 
-namespace Tensorflow.Keras.ArgsDefinition.Rnn
+namespace Tensorflow.Keras.ArgsDefinition
 {
     // TODO: complete the implementation
     public class LSTMCellArgs : AutoSerializeLayerArgs
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RNNArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RNNArgs.cs
index 2d7fb001a..d0b73ba44 100644
--- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RNNArgs.cs
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RNNArgs.cs
@@ -1,8 +1,8 @@
 ﻿using Newtonsoft.Json;
 using System.Collections.Generic;
-using Tensorflow.Keras.Layers.Rnn;
+using Tensorflow.Keras.Layers;
 
-namespace Tensorflow.Keras.ArgsDefinition.Rnn
+namespace Tensorflow.Keras.ArgsDefinition
 {
     // TODO(Rinne): add regularizers.
     public class RNNArgs : AutoSerializeLayerArgs
@@ -23,16 +23,27 @@ public class RNNArgs : AutoSerializeLayerArgs
         public int? InputDim { get; set; }
         public int? InputLength { get; set; }
         // TODO: Add `num_constants` and `zero_output_for_mask`.
-
+        [JsonProperty("units")]
         public int Units { get; set; }
+        [JsonProperty("activation")]
         public Activation Activation { get; set; }
+        [JsonProperty("recurrent_activation")]
         public Activation RecurrentActivation { get; set; }
+        [JsonProperty("use_bias")]
         public bool UseBias { get; set; } = true;
         public IInitializer KernelInitializer { get; set; }
         public IInitializer RecurrentInitializer { get; set; }
         public IInitializer BiasInitializer { get; set; }
+        [JsonProperty("dropout")]
         public float Dropout { get; set; } = .0f;
+        [JsonProperty("zero_output_for_mask")]
         public bool ZeroOutputForMask { get; set; } = false;
+        [JsonProperty("recurrent_dropout")]
         public float RecurrentDropout { get; set; } = .0f;
+
+        public RNNArgs Clone()
+        {
+            return (RNNArgs)MemberwiseClone();
+        }
     }
 }
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RnnOptionalArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RnnOptionalArgs.cs
index 64b500bba..a6520589d 100644
--- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RnnOptionalArgs.cs
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RnnOptionalArgs.cs
@@ -3,7 +3,7 @@
 using System.Text;
 using Tensorflow.Common.Types;
 
-namespace Tensorflow.Keras.ArgsDefinition.Rnn
+namespace Tensorflow.Keras.ArgsDefinition
 {
     public class RnnOptionalArgs: IOptionalArgs
     {
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNArgs.cs
index fcfd694d1..e45ef79d0 100644
--- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNArgs.cs
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNArgs.cs
@@ -1,4 +1,4 @@
-﻿namespace Tensorflow.Keras.ArgsDefinition.Rnn
+﻿namespace Tensorflow.Keras.ArgsDefinition
 {
     public class SimpleRNNArgs : RNNArgs
     {
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNCellArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNCellArgs.cs
index d21d61905..b84ea21b3 100644
--- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNCellArgs.cs
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNCellArgs.cs
@@ -1,6 +1,6 @@
 ﻿using Newtonsoft.Json;
 
-namespace Tensorflow.Keras.ArgsDefinition.Rnn
+namespace Tensorflow.Keras.ArgsDefinition
 {
     public class SimpleRNNCellArgs: AutoSerializeLayerArgs
     {
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/StackedRNNCellsArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/StackedRNNCellsArgs.cs
index 50a6127df..2600f14ee 100644
--- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/StackedRNNCellsArgs.cs
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/StackedRNNCellsArgs.cs
@@ -1,7 +1,7 @@
 ﻿using System.Collections.Generic;
-using Tensorflow.Keras.Layers.Rnn;
+using Tensorflow.Keras.Layers;
 
-namespace Tensorflow.Keras.ArgsDefinition.Rnn
+namespace Tensorflow.Keras.ArgsDefinition
 {
     public class StackedRNNCellsArgs : LayerArgs
     {
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/WrapperArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/WrapperArgs.cs
new file mode 100644
index 000000000..ec8e16d59
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/WrapperArgs.cs
@@ -0,0 +1,24 @@
+using Newtonsoft.Json;
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Text;
+
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class WrapperArgs : AutoSerializeLayerArgs
+    {
+        [JsonProperty("layer")]
+        public ILayer Layer { get; set; }
+
+        public WrapperArgs(ILayer layer) 
+        { 
+            Layer = layer;
+        }
+
+        public static implicit operator WrapperArgs(BidirectionalArgs args)
+            => new WrapperArgs(args.Layer);
+    }
+
+}
diff --git a/src/TensorFlowNET.Core/Keras/Engine/ICallback.cs b/src/TensorFlowNET.Core/Keras/Engine/ICallback.cs
index 096dbd2ef..e114ca97f 100644
--- a/src/TensorFlowNET.Core/Keras/Engine/ICallback.cs
+++ b/src/TensorFlowNET.Core/Keras/Engine/ICallback.cs
@@ -14,6 +14,9 @@ public interface ICallback
     void on_predict_batch_end(long end_step, Dictionary<string, Tensors> logs);
     void on_predict_end();
     void on_test_begin();
+    void on_test_end(Dictionary<string, float> logs);
     void on_test_batch_begin(long step);
     void on_test_batch_end(long end_step, Dictionary<string, float> logs);
+
+
 }
diff --git a/src/TensorFlowNET.Core/Keras/Engine/IModel.cs b/src/TensorFlowNET.Core/Keras/Engine/IModel.cs
index ddc72aeec..19f3df9ba 100644
--- a/src/TensorFlowNET.Core/Keras/Engine/IModel.cs
+++ b/src/TensorFlowNET.Core/Keras/Engine/IModel.cs
@@ -60,7 +60,7 @@ void load_weights(string filepath,
         bool skip_mismatch = false, 
         object options = null);
 
-    Dictionary<string, float> evaluate(Tensor x, Tensor y,
+    Dictionary<string, float> evaluate(NDArray x, NDArray y,
             int batch_size = -1,
             int verbose = 1,
             int steps = -1,
diff --git a/src/TensorFlowNET.Core/Keras/Engine/KerasTensor.cs b/src/TensorFlowNET.Core/Keras/Engine/KerasTensor.cs
new file mode 100644
index 000000000..5a264b631
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/Engine/KerasTensor.cs
@@ -0,0 +1,75 @@
+﻿namespace Tensorflow.Keras.Engine;
+
+/// <summary>
+/// A representation of a Keras in/output during Functional API construction.
+/// </summary>
+public class KerasTensor
+{
+    private Tensors _original_tensors;
+    public Tensors original_tensors
+    {
+        get => _original_tensors;
+        set => _original_tensors = value;
+    }
+
+    private Shape _inferred_value;
+    public Shape inferred_value => _inferred_value;
+
+    private string _name;
+    private TensorSpec _type_spec;
+    public Shape shape => _type_spec.shape;
+    public TF_DataType dtype => _type_spec.dtype;
+
+    public KerasTensor(TensorSpec type_spec, Shape inferred_value = null, string name = null)
+    {
+        _type_spec = type_spec;
+        _inferred_value = inferred_value;
+        _name = name;
+    }
+
+    public static KerasTensor from_tensor(Tensor tensor)
+    {
+        var type_spec = tensor.ToTensorSpec();
+        Shape? inferred_value = default;
+        if (tensor.dtype == TF_DataType.TF_INT32 && tensor.rank < 2)
+        {
+            inferred_value = tf.ones(tensor).shape;
+        }
+        var kt = new KerasTensor(type_spec, inferred_value: inferred_value, name: tensor.name);
+        kt.original_tensors = tensor;
+        return kt;
+    }
+
+    public KerasTensor this[int idx] 
+        => _original_tensors.First()[idx];
+
+    public KerasTensor this[params Slice[] slices]
+        => _original_tensors.First()[slices];
+
+    public override string ToString()
+        => _original_tensors.Length switch
+        {
+            > 1 => "[" + string.Join(", ", _original_tensors.Select(x => $"KerasTensor: shape={x.shape} dtype={x.dtype.as_numpy_name()}{GetInferredValueString()}")) + "]",
+            1 => $"KerasTensor: shape={_original_tensors.shape} dtype={_original_tensors.dtype.as_numpy_name()}{GetInferredValueString()}",
+            _ => _original_tensors.ToString(),
+        };
+
+    private string GetInferredValueString()
+        => _inferred_value == null ? "" : $" inferred_value={_inferred_value}";
+
+    public static implicit operator Tensors(KerasTensor kt)
+        => kt._original_tensors;
+
+    public static implicit operator Tensor(KerasTensor kt)
+    { 
+        Tensor tensor = kt._original_tensors;
+        tensor.IsFromKerasTensor = true;
+        return tensor;
+    }
+
+    public static implicit operator KerasTensor(Tensor tensor)
+        => from_tensor(tensor);
+
+    public static implicit operator KerasTensor(Tensors tensors)
+        => from_tensor(tensors.First());
+}
diff --git a/src/TensorFlowNET.Core/Keras/IOptimizerApi.cs b/src/TensorFlowNET.Core/Keras/IOptimizerApi.cs
index 961ce91ae..6c15fd469 100644
--- a/src/TensorFlowNET.Core/Keras/IOptimizerApi.cs
+++ b/src/TensorFlowNET.Core/Keras/IOptimizerApi.cs
@@ -25,6 +25,27 @@ IOptimizer Adam(float learning_rate = 0.001f,
                 bool amsgrad = false,
                 string name = "Adam");
 
+        /// <summary>
+        /// Adam enables L2 weight decay on gradients.
+        /// </summary>
+        /// <param name="learning_rate"></param>
+        /// <param name="weight_decay"></param>
+        /// <param name="beta_1"></param>
+        /// <param name="beta_2"></param>
+        /// <param name="epsilon"></param>
+        /// <param name="amsgrad"></param>
+        /// <param name="decay_params"></param>
+        /// <param name="name"></param>
+        /// <returns></returns>
+        IOptimizer AdamW(float learning_rate = 0.001f,
+                float weight_decay = 0.004f,
+                float beta_1 = 0.9f,
+                float beta_2 = 0.999f,
+                float epsilon = 1e-7f,
+                bool amsgrad = false,
+                List<string> no_decay_params = null,
+                string name = "AdamW");
+
         /// <summary>
         /// Construct a new RMSprop optimizer.
         /// </summary>
@@ -42,6 +63,6 @@ IOptimizer RMSprop(float learning_rate = 0.001f,
                 bool centered = false,
                 string name = "RMSprop");
 
-        IOptimizer SGD(float learning_rate);
+        IOptimizer SGD(float learning_rate = 0.01f, float momentum = 0f);
     }
 }
diff --git a/src/TensorFlowNET.Core/Keras/Layers/ILayer.cs b/src/TensorFlowNET.Core/Keras/Layers/ILayer.cs
index e94c8bf10..2f92c4e57 100644
--- a/src/TensorFlowNET.Core/Keras/Layers/ILayer.cs
+++ b/src/TensorFlowNET.Core/Keras/Layers/ILayer.cs
@@ -15,7 +15,7 @@ public interface ILayer: IWithTrackable, IKerasConfigable
         List<ILayer> Layers { get; }
         List<INode> InboundNodes { get; }
         List<INode> OutboundNodes { get; }
-        Tensors Apply(Tensors inputs, Tensors states = null, bool training = false, IOptionalArgs? optional_args = null);
+        Tensors Apply(Tensors inputs, Tensors states = null, bool? training = false, IOptionalArgs? optional_args = null);
         List<IVariableV1> TrainableVariables { get; }
         List<IVariableV1> TrainableWeights { get; }
         List<IVariableV1> NonTrainableWeights { get; }
diff --git a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
index a19508d42..5e08eadc4 100644
--- a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
+++ b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
@@ -1,6 +1,7 @@
 ﻿using System;
 using Tensorflow.Framework.Models;
-using Tensorflow.Keras.Layers.Rnn;
+using Tensorflow.Keras.Engine;
+using Tensorflow.Keras.Layers;
 using Tensorflow.NumPy;
 using static Google.Protobuf.Reflection.FieldDescriptorProto.Types;
 
@@ -135,7 +136,7 @@ public ILayer EinsumDense(string equation,
         public ILayer GlobalMaxPooling1D(string data_format = "channels_last");
         public ILayer GlobalMaxPooling2D(string data_format = "channels_last");
 
-        public Tensors Input(Shape shape = null,
+        public KerasTensor Input(Shape shape = null,
             int batch_size = -1,
             string name = null,
             TF_DataType dtype = TF_DataType.DtInvalid,
@@ -246,6 +247,49 @@ public ILayer RNN(
             bool time_major = false
             );
 
+        public IRnnCell GRUCell(
+            int units,
+            string activation = "tanh",
+            string recurrent_activation = "sigmoid",
+            bool use_bias = true,
+            string kernel_initializer = "glorot_uniform",
+            string recurrent_initializer = "orthogonal",
+            string bias_initializer = "zeros",
+            float dropout = 0f,
+            float recurrent_dropout = 0f, 
+            bool reset_after = true);
+
+        public ILayer GRU(
+            int units,
+            string activation = "tanh",
+            string recurrent_activation = "sigmoid",
+            bool use_bias = true,
+            string kernel_initializer = "glorot_uniform",
+            string recurrent_initializer = "orthogonal",
+            string bias_initializer = "zeros",
+            float dropout = 0f,
+            float recurrent_dropout = 0f,
+            bool return_sequences = false,
+            bool return_state = false,
+            bool go_backwards = false,
+            bool stateful = false,
+            bool unroll = false,
+            bool time_major = false,
+            bool reset_after = true
+            );
+
+        /// <summary>
+        /// Bidirectional wrapper for RNNs.
+        /// </summary>
+        /// <param name="layer">`keras.layers.RNN` instance, such as `keras.layers.LSTM` or `keras.layers.GRU`</param>
+        /// automatically.</param>
+        /// <returns></returns>
+        public ILayer Bidirectional(
+                ILayer layer,
+                string merge_mode = "concat",
+                NDArray weights = null,
+                ILayer backward_layer = null);
+
         public ILayer Subtract();
     }
 }
diff --git a/src/TensorFlowNET.Core/Keras/Layers/Rnn/IRnnCell.cs b/src/TensorFlowNET.Core/Keras/Layers/Rnn/IRnnCell.cs
index 8d6fbc976..43df75b17 100644
--- a/src/TensorFlowNET.Core/Keras/Layers/Rnn/IRnnCell.cs
+++ b/src/TensorFlowNET.Core/Keras/Layers/Rnn/IRnnCell.cs
@@ -3,7 +3,7 @@
 using System.Text;
 using Tensorflow.Common.Types;
 
-namespace Tensorflow.Keras.Layers.Rnn
+namespace Tensorflow.Keras.Layers
 {
     public interface IRnnCell: ILayer
     {
diff --git a/src/TensorFlowNET.Core/Keras/Layers/Rnn/IStackedRnnCells.cs b/src/TensorFlowNET.Core/Keras/Layers/Rnn/IStackedRnnCells.cs
index e73244a51..8cf6150d3 100644
--- a/src/TensorFlowNET.Core/Keras/Layers/Rnn/IStackedRnnCells.cs
+++ b/src/TensorFlowNET.Core/Keras/Layers/Rnn/IStackedRnnCells.cs
@@ -2,7 +2,7 @@
 using System.Collections.Generic;
 using System.Text;
 
-namespace Tensorflow.Keras.Layers.Rnn
+namespace Tensorflow.Keras.Layers
 {
     public interface IStackedRnnCells : IRnnCell
     {
diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
index f29879b0f..c0f9e695d 100644
--- a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
+++ b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
@@ -4,6 +4,8 @@
 using System.Linq;
 using System.Text;
 using Tensorflow.Util;
+using Razorvine.Pickle;
+using Tensorflow.NumPy.Pickle;
 using static Tensorflow.Binding;
 
 namespace Tensorflow.NumPy
@@ -97,6 +99,14 @@ Array ReadValueMatrix(BinaryReader reader, Array matrix, int bytes, Type type, i
             return matrix;
         }
 
+        Array ReadObjectMatrix(BinaryReader reader, Array matrix, int[] shape)
+        {
+            Stream deflateStream = reader.BaseStream;
+            BufferedStream bufferedStream = new BufferedStream(deflateStream);
+            var unpickler = new Unpickler();
+            return (MultiArrayPickleWarpper)unpickler.load(bufferedStream);
+        }
+
         public (NDArray, NDArray) meshgrid<T>(T[] array, bool copy = true, bool sparse = false)
         {
             var tensors = array_ops.meshgrid(array, copy: copy, sparse: sparse);
diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs
index 05f53d5e7..199e5ced3 100644
--- a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs
+++ b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs
@@ -27,8 +27,14 @@ public Array LoadMatrix(Stream stream)
                 Array matrix = Array.CreateInstance(type, shape);
 
                 //if (type == typeof(String))
-                    //return ReadStringMatrix(reader, matrix, bytes, type, shape);
-                return ReadValueMatrix(reader, matrix, bytes, type, shape);
+                //return ReadStringMatrix(reader, matrix, bytes, type, shape);
+
+                if (type == typeof(Object))
+                    return ReadObjectMatrix(reader, matrix, shape);
+                else
+                {
+                    return ReadValueMatrix(reader, matrix, bytes, type, shape);
+                }
             }
         }
 
@@ -37,7 +43,7 @@ public T Load<T>(Stream stream)
             ICloneable, IList, ICollection, IEnumerable, IStructuralComparable, IStructuralEquatable
         {
             // if (typeof(T).IsArray && (typeof(T).GetElementType().IsArray || typeof(T).GetElementType() == typeof(string)))
-                // return LoadJagged(stream) as T;
+            // return LoadJagged(stream) as T;
             return LoadMatrix(stream) as T;
         }
 
@@ -93,7 +99,7 @@ bool ParseReader(BinaryReader reader, out int bytes, out Type t, out int[] shape
         Type GetType(string dtype, out int bytes, out bool? isLittleEndian)
         {
             isLittleEndian = IsLittleEndian(dtype);
-            bytes = Int32.Parse(dtype.Substring(2));
+            bytes = dtype.Length > 2 ? Int32.Parse(dtype.Substring(2)) : 0;
 
             string typeCode = dtype.Substring(1);
 
@@ -121,6 +127,8 @@ Type GetType(string dtype, out int bytes, out bool? isLittleEndian)
                 return typeof(Double);
             if (typeCode.StartsWith("S"))
                 return typeof(String);
+            if (typeCode.StartsWith("O"))
+                return typeof(Object);
 
             throw new NotSupportedException();
         }
diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/RandomizedImpl.cs b/src/TensorFlowNET.Core/NumPy/Implementation/RandomizedImpl.cs
index 064c7362f..a707e8aae 100644
--- a/src/TensorFlowNET.Core/NumPy/Implementation/RandomizedImpl.cs
+++ b/src/TensorFlowNET.Core/NumPy/Implementation/RandomizedImpl.cs
@@ -14,9 +14,9 @@ public class RandomizedImpl
         public NDArray permutation(NDArray x) => new NDArray(random_ops.random_shuffle(x));
 
         [AutoNumPy]
-        public void shuffle(NDArray x)
+        public void shuffle(NDArray x, int? seed = null)
         {
-            var y = random_ops.random_shuffle(x);
+            var y = random_ops.random_shuffle(x, seed);
             Marshal.Copy(y.BufferToArray(), 0, x.TensorDataPointer, (int)x.bytesize);
         }
 
diff --git a/src/TensorFlowNET.Core/NumPy/NDArrayConverter.cs b/src/TensorFlowNET.Core/NumPy/NDArrayConverter.cs
index c8c2d45fa..4c64eba74 100644
--- a/src/TensorFlowNET.Core/NumPy/NDArrayConverter.cs
+++ b/src/TensorFlowNET.Core/NumPy/NDArrayConverter.cs
@@ -10,6 +10,7 @@ public class NDArrayConverter
         public unsafe static T Scalar<T>(NDArray nd) where T : unmanaged
             => nd.dtype switch
             {
+                TF_DataType.TF_BOOL => Scalar<T>(*(bool*)nd.data),
                 TF_DataType.TF_UINT8 => Scalar<T>(*(byte*)nd.data),
                 TF_DataType.TF_FLOAT => Scalar<T>(*(float*)nd.data),
                 TF_DataType.TF_INT32 => Scalar<T>(*(int*)nd.data),
diff --git a/src/TensorFlowNET.Core/NumPy/NumPy.Sorting.Searching.Counting.cs b/src/TensorFlowNET.Core/NumPy/NumPy.Sorting.Searching.Counting.cs
index 5182d5726..4cad36e0b 100644
--- a/src/TensorFlowNET.Core/NumPy/NumPy.Sorting.Searching.Counting.cs
+++ b/src/TensorFlowNET.Core/NumPy/NumPy.Sorting.Searching.Counting.cs
@@ -13,6 +13,10 @@ public partial class np
         public static NDArray argmax(NDArray a, Axis? axis = null)
             => new NDArray(math_ops.argmax(a, axis ?? 0));
 
+        [AutoNumPy]
+        public static NDArray argmin(NDArray a, Axis? axis = null)
+            => new NDArray(math_ops.argmin(a, axis ?? 0));
+
         [AutoNumPy]
         public static NDArray argsort(NDArray a, Axis? axis = null)
             => new NDArray(sort_ops.argsort(a, axis: axis ?? -1));
diff --git a/src/TensorFlowNET.Core/NumPy/NumPy.Statistics.cs b/src/TensorFlowNET.Core/NumPy/NumPy.Statistics.cs
index 5d86b1b39..bce16ec9f 100644
--- a/src/TensorFlowNET.Core/NumPy/NumPy.Statistics.cs
+++ b/src/TensorFlowNET.Core/NumPy/NumPy.Statistics.cs
@@ -10,10 +10,10 @@ namespace Tensorflow.NumPy
     public partial class np
     {
         [AutoNumPy]
-        public static NDArray amin(NDArray x, int axis = 0) => new NDArray(tf.arg_min(x, axis));
+        public static NDArray amin(NDArray x, int axis = 0) => new NDArray(tf.min(x, axis));
 
         [AutoNumPy]
-        public static NDArray amax(NDArray x, int axis = 0) => new NDArray(tf.math.argmax(x, axis));
+        public static NDArray amax(NDArray x, int axis = 0) => new NDArray(tf.max(x, axis));
 
         [AutoNumPy]
         public static NDArray average(NDArray a, int axis = -1, NDArray? weights = null, bool returned = false)
diff --git a/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs b/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs
index 5bc97952b..2559638b3 100644
--- a/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs
+++ b/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs
@@ -85,5 +85,11 @@ public static NDArray dot(NDArray x1, NDArray x2, NDArray? axes = null, string?
 
         [AutoNumPy]
         public static NDArray add(NDArray x, NDArray y) => new NDArray(math_ops.add(x, y));
+
+        [AutoNumPy]
+        public static NDArray greater(NDArray x, NDArray y) => new NDArray(tf.greater(x, y));
+
+        [AutoNumPy]
+        public static NDArray less(NDArray x, NDArray y) => new NDArray(tf.less(x, y));
     }
 }
diff --git a/src/TensorFlowNET.Core/NumPy/Pickle/DTypePickleWarpper.cs b/src/TensorFlowNET.Core/NumPy/Pickle/DTypePickleWarpper.cs
new file mode 100644
index 000000000..5dff6c16b
--- /dev/null
+++ b/src/TensorFlowNET.Core/NumPy/Pickle/DTypePickleWarpper.cs
@@ -0,0 +1,20 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.NumPy.Pickle
+{
+    public class DTypePickleWarpper
+    {
+        TF_DataType dtype { get; set; }
+        public DTypePickleWarpper(TF_DataType dtype)
+        {
+            this.dtype = dtype;
+        }
+        public void __setstate__(object[] args) { }
+        public static implicit operator TF_DataType(DTypePickleWarpper dTypeWarpper)
+        {
+            return dTypeWarpper.dtype;
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Core/NumPy/Pickle/DtypeConstructor.cs b/src/TensorFlowNET.Core/NumPy/Pickle/DtypeConstructor.cs
new file mode 100644
index 000000000..160c7d4e9
--- /dev/null
+++ b/src/TensorFlowNET.Core/NumPy/Pickle/DtypeConstructor.cs
@@ -0,0 +1,52 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Text;
+using Razorvine.Pickle;
+
+namespace Tensorflow.NumPy.Pickle
+{
+    /// <summary>
+    /// 
+    /// </summary>
+    [SuppressMessage("ReSharper", "InconsistentNaming")]
+    [SuppressMessage("ReSharper", "MemberCanBePrivate.Global")]
+    [SuppressMessage("ReSharper", "MemberCanBeMadeStatic.Global")]
+    class DtypeConstructor : IObjectConstructor
+    {
+        public object construct(object[] args)
+        {
+            var typeCode = (string)args[0];
+            TF_DataType dtype;
+            if (typeCode == "b1")
+                dtype = np.@bool;
+            else if (typeCode == "i1")
+                dtype = np.@byte;
+            else if (typeCode == "i2")
+                dtype = np.int16;
+            else if (typeCode == "i4")
+                dtype = np.int32;
+            else if (typeCode == "i8")
+                dtype = np.int64;
+            else if (typeCode == "u1")
+                dtype = np.ubyte;
+            else if (typeCode == "u2")
+                dtype = np.uint16;
+            else if (typeCode == "u4")
+                dtype = np.uint32;
+            else if (typeCode == "u8")
+                dtype = np.uint64;
+            else if (typeCode == "f4")
+                dtype = np.float32;
+            else if (typeCode == "f8")
+                dtype = np.float64;
+            else if (typeCode.StartsWith("S"))
+                dtype = np.@string;
+            else if (typeCode.StartsWith("O"))
+                dtype = np.@object;
+            else
+                throw new NotSupportedException();
+            return new DTypePickleWarpper(dtype);
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayConstructor.cs b/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayConstructor.cs
new file mode 100644
index 000000000..885f368c4
--- /dev/null
+++ b/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayConstructor.cs
@@ -0,0 +1,53 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Text;
+using Razorvine.Pickle;
+using Razorvine.Pickle.Objects;
+
+namespace Tensorflow.NumPy.Pickle
+{
+    /// <summary>
+    /// Creates multiarrays of objects. Returns a primitive type multiarray such as int[][] if 
+    /// the objects are ints, etc. 
+    /// </summary>
+    [SuppressMessage("ReSharper", "InconsistentNaming")]
+    [SuppressMessage("ReSharper", "MemberCanBePrivate.Global")]
+    [SuppressMessage("ReSharper", "MemberCanBeMadeStatic.Global")]
+    public class MultiArrayConstructor : IObjectConstructor
+    {
+        public object construct(object[] args)
+        {
+            if (args.Length != 3)
+                throw new InvalidArgumentError($"Invalid number of arguments in MultiArrayConstructor._reconstruct. Expected three arguments. Given {args.Length} arguments.");
+
+            var types = (ClassDictConstructor)args[0];
+            if (types.module != "numpy" || types.name != "ndarray")
+                throw new RuntimeError("_reconstruct: First argument must be a sub-type of ndarray");
+
+            var arg1 = (object[])args[1];
+            var dims = new int[arg1.Length];
+            for (var i = 0; i < arg1.Length; i++)
+            {
+                dims[i] = (int)arg1[i];
+            }
+            var shape = new Shape(dims);
+
+            TF_DataType dtype;
+            string identifier;
+            if (args[2].GetType() == typeof(string))
+                identifier = (string)args[2];
+            else
+                identifier = Encoding.UTF8.GetString((byte[])args[2]);
+            switch (identifier)
+            {
+                case "u": dtype = np.uint32; break;
+                case "c": dtype = np.complex_; break;
+                case "f": dtype = np.float32; break;
+                case "b": dtype = np.@bool; break;
+                default: throw new NotImplementedException($"Unsupported data type: {args[2]}");
+            }
+            return new MultiArrayPickleWarpper(shape, dtype);
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayPickleWarpper.cs b/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayPickleWarpper.cs
new file mode 100644
index 000000000..af8d1ecc2
--- /dev/null
+++ b/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayPickleWarpper.cs
@@ -0,0 +1,119 @@
+﻿using Newtonsoft.Json.Linq;
+using Serilog.Debugging;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.NumPy.Pickle
+{
+    public class MultiArrayPickleWarpper
+    {
+        public Shape reconstructedShape { get; set; }
+        public TF_DataType reconstructedDType { get; set; }
+        public NDArray reconstructedNDArray { get; set; }
+        public Array reconstructedMultiArray { get; set; }
+        public MultiArrayPickleWarpper(Shape shape, TF_DataType dtype)
+        {
+            reconstructedShape = shape;
+            reconstructedDType = dtype;
+        }
+        public void __setstate__(object[] args)
+        {
+            if (args.Length != 5)
+                throw new InvalidArgumentError($"Invalid number of arguments in NDArray.__setstate__. Expected five arguments. Given {args.Length} arguments.");
+
+            var version = (int)args[0]; // version
+
+            var arg1 = (object[])args[1];
+            var dims = new int[arg1.Length];
+            for (var i = 0; i < arg1.Length; i++)
+            {
+                dims[i] = (int)arg1[i];
+            }
+            var _ShapeLike = new Shape(dims); // shape
+
+            TF_DataType _DType_co = (DTypePickleWarpper)args[2]; // DType
+
+            var F_continuous = (bool)args[3]; // F-continuous
+            if (F_continuous)
+                throw new InvalidArgumentError("Fortran Continuous memory layout is not supported. Please use C-continuous layout or check the data format.");
+
+            var data = args[4]; // Data
+            /*
+             * If we ever need another pickle format, increment the version
+             * number. But we should still be able to handle the old versions.
+             */
+            if (version < 0 || version > 4)
+                throw new ValueError($"can't handle version {version} of numpy.dtype pickle");
+
+            // TODO: Implement the missing details and checks from the official Numpy C code here.
+            // https://github.com/numpy/numpy/blob/2f0bd6e86a77e4401d0384d9a75edf9470c5deb6/numpy/core/src/multiarray/descriptor.c#L2761
+
+            if (data.GetType() == typeof(ArrayList))
+            {
+                Reconstruct((ArrayList)data);
+            }
+            else
+                throw new NotImplementedException("");
+        }
+        private void Reconstruct(ArrayList arrayList)
+        {
+            int ndim = 1;
+            var subArrayList = arrayList;
+            while (subArrayList.Count > 0 && subArrayList[0] != null && subArrayList[0].GetType() == typeof(ArrayList))
+            {
+                subArrayList = (ArrayList)subArrayList[0];
+                ndim += 1;
+            }
+            var type = subArrayList[0].GetType();
+            if (type == typeof(int))
+            {
+                if (ndim == 1)
+                {
+                    int[] list = (int[])arrayList.ToArray(typeof(int));
+                    Shape shape = new Shape(new int[] { arrayList.Count });
+                    reconstructedMultiArray = list;
+                    reconstructedNDArray = new NDArray(list, shape);
+                }
+                if (ndim == 2)
+                {
+                    int secondDim = 0;
+                    foreach (ArrayList subArray in arrayList)
+                    {
+                        secondDim = subArray.Count > secondDim ? subArray.Count : secondDim;
+                    }
+                    int[,] list = new int[arrayList.Count, secondDim];
+                    for (int i = 0; i < arrayList.Count; i++)
+                    {
+                        var subArray = (ArrayList?)arrayList[i];
+                        if (subArray == null)
+                            throw new NullReferenceException("");
+                        for (int j = 0; j < subArray.Count; j++)
+                        {
+                            var element = subArray[j];
+                            if (element == null)
+                                throw new NoNullAllowedException("the element of ArrayList cannot be null.");
+                            list[i, j] = (int)element;
+                        }
+                    }
+                    Shape shape = new Shape(new int[] { arrayList.Count, secondDim });
+                    reconstructedMultiArray = list;
+                    reconstructedNDArray = new NDArray(list, shape);
+                }
+                if (ndim > 2)
+                    throw new NotImplementedException("can't handle ArrayList with more than two dimensions.");
+            }
+            else
+                throw new NotImplementedException("");
+        }
+        public static implicit operator Array(MultiArrayPickleWarpper arrayWarpper)
+        {
+            return arrayWarpper.reconstructedMultiArray;
+        }
+        public static implicit operator NDArray(MultiArrayPickleWarpper arrayWarpper)
+        {
+            return arrayWarpper.reconstructedNDArray;
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Core/Numpy/Numpy.cs b/src/TensorFlowNET.Core/Numpy/Numpy.cs
index 72d2e981c..fee2d63fc 100644
--- a/src/TensorFlowNET.Core/Numpy/Numpy.cs
+++ b/src/TensorFlowNET.Core/Numpy/Numpy.cs
@@ -43,7 +43,9 @@ public partial class np
     public static readonly TF_DataType @decimal = TF_DataType.TF_DOUBLE;
     public static readonly TF_DataType complex_ = TF_DataType.TF_COMPLEX;
     public static readonly TF_DataType complex64 = TF_DataType.TF_COMPLEX64;
-    public static readonly TF_DataType complex128 = TF_DataType.TF_COMPLEX128; 
+    public static readonly TF_DataType complex128 = TF_DataType.TF_COMPLEX128;
+    public static readonly TF_DataType @string = TF_DataType.TF_STRING;
+    public static readonly TF_DataType @object = TF_DataType.TF_VARIANT;
     #endregion
 
     public static double nan => double.NaN;
diff --git a/src/TensorFlowNET.Core/Operations/NnOps/RNNCell.cs b/src/TensorFlowNET.Core/Operations/NnOps/RNNCell.cs
index e488c47e7..9905d39c8 100644
--- a/src/TensorFlowNET.Core/Operations/NnOps/RNNCell.cs
+++ b/src/TensorFlowNET.Core/Operations/NnOps/RNNCell.cs
@@ -19,9 +19,8 @@ limitations under the License.
 using Tensorflow.Common.Types;
 using Tensorflow.Keras;
 using Tensorflow.Keras.ArgsDefinition;
-using Tensorflow.Keras.ArgsDefinition.Rnn;
 using Tensorflow.Keras.Engine;
-using Tensorflow.Keras.Layers.Rnn;
+using Tensorflow.Keras.Layers;
 using Tensorflow.Keras.Saving;
 using Tensorflow.NumPy;
 using Tensorflow.Operations;
@@ -145,7 +144,7 @@ private Tensor _zero_state_tensors(object state_size, Tensor batch_size, TF_Data
             throw new NotImplementedException("_zero_state_tensors");
         }
 
-        public Tensors Apply(Tensors inputs, Tensors state = null, bool is_training = false, IOptionalArgs? optional_args = null)
+        public Tensors Apply(Tensors inputs, Tensors state = null, bool? is_training = false, IOptionalArgs? optional_args = null)
         {
             throw new NotImplementedException();
         }
diff --git a/src/TensorFlowNET.Core/Operations/Operation.cs b/src/TensorFlowNET.Core/Operations/Operation.cs
index d31b26d4a..e59c381cb 100644
--- a/src/TensorFlowNET.Core/Operations/Operation.cs
+++ b/src/TensorFlowNET.Core/Operations/Operation.cs
@@ -206,12 +206,11 @@ internal unsafe TF_DataType _get_attr_type(string name)
             return result;
         }
 
-        internal unsafe int _get_attr_int(string name)
+        internal unsafe long _get_attr_int(string name)
         {
-            Status status = new();
-            int result;
-            c_api.TF_OperationGetAttrInt(_handle, name, new IntPtr(&result), status);
-            status.Check(true);
+            long result;
+            c_api.TF_OperationGetAttrInt(_handle, name, new IntPtr(&result), tf.Status);
+            tf.Status.Check(true);
             return result;
         }
 
diff --git a/src/TensorFlowNET.Core/Operations/array_ops.cs b/src/TensorFlowNET.Core/Operations/array_ops.cs
index 7f787533a..f80dcd2c4 100644
--- a/src/TensorFlowNET.Core/Operations/array_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/array_ops.cs
@@ -137,7 +137,7 @@ public static Tensor zeros(Tensors shape, TF_DataType dtype = TF_DataType.TF_FLO
             if(shape.Length > 1)
             {
                 shapeTensor = ops.convert_to_tensor(shape, dtypes.int32);
-                if(shapeTensor.ndim > 1)
+                if (shapeTensor.ndim > 1)
                 {
                     shapeTensor = array_ops.reshape(shapeTensor, new Shape(-1));
                 }
@@ -304,6 +304,10 @@ public static Tensor _autopacking_helper(IEnumerable<object> list_or_tuple, TF_D
                         {
                             elems_as_tensors.Add(tensor);
                         }
+                        else if (elem is KerasTensor kt)
+                        {
+                            elems_as_tensors.Add(kt);
+                        }
                         else
                         {
                             var elem_tensor = constant_op.constant(elem, dtype: dtype, name: i.ToString());
@@ -404,7 +408,20 @@ public static Tensor reshape(Tensor tensor, Shape shape, string name = null)
             => gen_array_ops.reshape(tensor, shape, name: name);
 
         public static Tensor reshape(Tensor tensor, object[] shape, string name = null)
-            => gen_array_ops.reshape(tensor, ops.convert_to_tensor(shape), name: name);
+        {
+            var dims = shape_utils.from_object_array(shape);
+            return gen_array_ops.reshape(tensor, dims, name: name);
+        }
+
+        public static Tensor reverse(Tensor tensor, Tensor axis, string name = null)
+            => tf.Context.ExecuteOp("ReverseV2", name, new ExecuteOpArgs(tensor, axis)
+            {
+                GetGradientAttrs = (op) => new
+                {
+                    T = op.get_attr<TF_DataType>("T"),
+                    Tidx = op.get_attr<TF_DataType>("Tidx")
+                }
+            });
 
         private static Tensor ones_like_impl<T>(T tensor, TF_DataType dtype, string name, bool optimize = true)
         {
@@ -425,6 +442,10 @@ public static Tensor ones(Tensor shape, TF_DataType dtype = TF_DataType.TF_FLOAT
             return tf_with(ops.name_scope(name, "ones", new { shape }), scope =>
             {
                 name = scope;
+                if (shape._shape_tuple().Length == 0)
+                {
+                    shape = reshape(shape, new Shape(-1));
+                }
                 var output = gen_array_ops.fill(shape, constant_op.constant(1.0f, dtype: dtype), name: name);
                 return output;
             });
@@ -603,7 +624,17 @@ public static Tensor shape_internal(Tensor input, string name = null, bool optim
                     }
                 }
 
-                return gen_array_ops.shape(input, name: name, out_type: out_type);
+                return tf.Context.ExecuteOp("Shape", name, new ExecuteOpArgs(input)
+                {
+                    GetGradientAttrs = (op) => new
+                    {
+                        T = op.get_attr<TF_DataType>("T"),
+                        out_type = op.get_attr<TF_DataType>("out_type")
+                    }
+                }.SetAttributes(new
+                {
+                    out_type
+                })).First();
             });
         }
 
@@ -637,6 +668,18 @@ public static Tensor tile(Tensor input, Tensor multiples, string name = null)
                 }
             });
 
+        /*public static Tensor tile(Tensor input, Shape multiples, string name = null)
+        {
+            return tf.Context.ExecuteOp("Tile", name, new ExecuteOpArgs(input, multiples)
+            {
+                GetGradientAttrs = (op) => new
+                {
+                    T = op.get_attr<TF_DataType>("T"),
+                    Tmultiples = op.get_attr<TF_DataType>("Tmultiples")
+                }
+            });
+        }*/
+
         public static Tensor zeros_like(Tensor tensor, TF_DataType dtype = TF_DataType.DtInvalid, string name = null, bool optimize = true)
         {
             return tf_with(ops.name_scope(name, "zeros_like", new Tensor[] { tensor }), scope =>
@@ -678,7 +721,6 @@ public static Tensor stop_gradient(Tensor input, string name = null)
             var tape = tf.GradientTape().stop_recording();
             var result = gen_array_ops.stop_gradient(input, name);
             tape.StartRecord();
-            tf.GradientTape().PushTape(tape);
             return result;
         }
 
@@ -704,23 +746,26 @@ public static Tensor strided_slice(Tensor input_, Tensor begin, Tensor end,
             int new_axis_mask = 0,
             int shrink_axis_mask = 0,
             string name = null)
-        {
-            var op = gen_array_ops.strided_slice(
-                input: input_,
-                begin: begin,
-                end: end,
-                strides: strides,
-                begin_mask: begin_mask,
-                end_mask: end_mask,
-                ellipsis_mask: ellipsis_mask,
-                new_axis_mask: new_axis_mask,
-                shrink_axis_mask: shrink_axis_mask,
-                name: name);
-
-            string parent_name = name;
-
-            return op;
-        }
+                => tf.Context.ExecuteOp("StridedSlice", name, new ExecuteOpArgs(input_, begin, end, strides)
+                {
+                    GetGradientAttrs = (op) => new
+                    {
+                        T = op.get_attr<TF_DataType>("T"),
+                        Index = op.get_attr<TF_DataType>("Index"),
+                        begin_mask = op.get_attr<long>("begin_mask"),
+                        end_mask = op.get_attr<long>("end_mask"),
+                        ellipsis_mask = op.get_attr<long>("ellipsis_mask"),
+                        new_axis_mask = op.get_attr<long>("new_axis_mask"),
+                        shrink_axis_mask = op.get_attr<long>("shrink_axis_mask")
+                    }
+                }.SetAttributes(new
+                {
+                    begin_mask,
+                    end_mask,
+                    ellipsis_mask,
+                    new_axis_mask,
+                    shrink_axis_mask
+                }));
 
         /// <summary>
         /// Returns the gradient of `StridedSlice`.
@@ -893,23 +938,9 @@ public static Tensor broadcast_static_shape(Tensor shape_x, Tensor shape_y)
         /// <param name="axis"></param>
         /// <param name="name"></param>
         /// <returns></returns>
-        public static Tensor concat(Tensor[] values, int axis, string name = "concat")
-        {
-            if (values.Length == 1) // Degenerate case of one tensor.
-            {
-                return tf_with(ops.name_scope(name), scope =>
-                {
-                    var t = ops.convert_to_tensor(axis, name: "concat_dim", dtype: TF_DataType.TF_INT32);
-                    return identity(values[0], name: scope);
-                });
-            }
-
-            return gen_array_ops.concat_v2(values, ops.convert_to_tensor(axis), name: name);
-        }
-
         public static Tensor concat(Tensor[] values, Tensor axis, string name = "concat")
         {
-            return gen_array_ops.concat_v2(values, axis, name: name);
+            return tf.Context.ExecuteOp("ConcatV2", name, new ExecuteOpArgs(values, axis));
         }
 
         public static Tensor concat(object[] values, int axis, string name = "concat")
diff --git a/src/TensorFlowNET.Core/Operations/gen_image_ops.cs b/src/TensorFlowNET.Core/Operations/gen_image_ops.cs
index 9240b5905..cbe661ae5 100644
--- a/src/TensorFlowNET.Core/Operations/gen_image_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/gen_image_ops.cs
@@ -16,18 +16,312 @@ limitations under the License.
 
 using System;
 using System.Linq;
+using Tensorflow.Eager;
 using static Tensorflow.Binding;
+using Tensorflow.Exceptions;
+using Tensorflow.Contexts;
+using System.Xml.Linq;
+using Google.Protobuf;
 
 namespace Tensorflow
 {
     public class gen_image_ops
     {
+        public static Tensor adjust_contrastv2(Tensor images, Tensor contrast_factor, string name = null)
+        {
+            var _ctx = tf.Context;
+            if (_ctx.executing_eagerly())
+            {
+                try
+                {
+                    var _fast_path_result = tf.Runner.TFE_FastPathExecute(new FastPathOpExecInfo(_ctx, "AdjustContrastv2", name) { 
+                                                args = new object[] { images, contrast_factor }, attrs = new Dictionary<string, object>() { } });
+                    return _fast_path_result[0];
+                }
+                catch (NotOkStatusException ex)
+                {
+                    throw ex;
+                }
+                catch (Exception)
+                {
+                }
+                try
+                {
+                    return adjust_contrastv2_eager_fallback(images, contrast_factor, name: name, ctx: _ctx);
+                }
+                catch (Exception)
+                {
+                }
+            }
+            Dictionary<string, object> keywords = new();
+            keywords["images"] = images;
+            keywords["contrast_factor"] = contrast_factor;
+            var _op = tf.OpDefLib._apply_op_helper("AdjustContrastv2", name, keywords);
+            var _result = _op.outputs;
+            if (_execute.must_record_gradient())
+            {
+                object[] _attrs = new object[] { "T", _op._get_attr_type("T") };
+                _execute.record_gradient("AdjustContrastv2", _op.inputs, _attrs, _result);
+            }
+            return _result[0];
+        }
+        public static Tensor adjust_contrastv2(Tensor image, float contrast_factor, string name = null)
+        {
+            return adjust_contrastv2(image, tf.convert_to_tensor(contrast_factor), name: name);
+        }
+
+        public static Tensor adjust_contrastv2_eager_fallback(Tensor images, Tensor contrast_factor, string name, Context ctx)
+        {
+            Tensor[] _inputs_flat = new Tensor[] { images, contrast_factor};
+            object[] _attrs = new object[] { "T", images.dtype };
+            var _result = _execute.execute("AdjustContrastv2", 1, inputs: _inputs_flat, attrs: _attrs, ctx: ctx, name: name);
+            if (_execute.must_record_gradient())
+            {
+                _execute.record_gradient("AdjustContrastv2", _inputs_flat, _attrs, _result);
+            }
+            return _result[0];
+        }
+
+        public static Tensor adjust_hue(Tensor images, Tensor delta, string name = null)
+        {
+            var _ctx = tf.Context;
+            if (_ctx.executing_eagerly())
+            {
+                try
+                {
+                    var _fast_path_result = tf.Runner.TFE_FastPathExecute(new FastPathOpExecInfo(_ctx, "AdjustHue", name) { 
+                                                args = new object[] { images, delta }, attrs = new Dictionary<string, object>() { } });
+                    return _fast_path_result[0];
+                }
+                catch (NotOkStatusException ex)
+                {
+                    throw ex;
+                }
+                catch (Exception)
+                {
+                }
+                try
+                {
+                    return adjust_hue_eager_fallback(images, delta, name: name, ctx: _ctx);
+                }
+                catch (Exception)
+                {
+                }
+            }
+            Dictionary<string, object> keywords = new();
+            keywords["images"] = images;
+            keywords["delta"] = delta;
+            var _op = tf.OpDefLib._apply_op_helper("AdjustHue", name, keywords);
+            var _result = _op.outputs;
+            if (_execute.must_record_gradient())
+            {
+                object[] _attrs = new object[] { "T", _op._get_attr_type("T") };
+                _execute.record_gradient("AdjustHue", _op.inputs, _attrs, _result);
+            }
+            return _result[0];
+        }
+
+        public static Tensor adjust_hue(Tensor images, float delta, string name = null)
+            => adjust_hue(images, delta, name: name);
+
+        public static Tensor adjust_hue_eager_fallback(Tensor images, Tensor delta, string name, Context ctx)
+        {
+            Tensor[] _inputs_flat = new Tensor[] { images, delta};
+            object[] _attrs = new object[] { "T", images.dtype };
+            var _result = _execute.execute("AdjustHue", 1, inputs: _inputs_flat, attrs: _attrs, ctx: ctx, name: name);
+            if (_execute.must_record_gradient())
+            {
+                _execute.record_gradient("AdjustHue", _inputs_flat, _attrs, _result);
+            }
+            return _result[0];
+        } 
+
+        public static Tensor adjust_saturation(Tensor images, Tensor scale, string name = null)
+        {
+            var _ctx = tf.Context;
+            if (_ctx.executing_eagerly())
+            {
+                try
+                {
+                    var _fast_path_result = tf.Runner.TFE_FastPathExecute(new FastPathOpExecInfo(_ctx, "AdjustSaturation", name)
+                    {
+                        args = new object[] { images, scale },
+                        attrs = new Dictionary<string, object>() { }
+                    });
+                    return _fast_path_result[0];
+                }
+                catch (NotOkStatusException ex)
+                {
+                    throw ex;
+                }
+                catch (Exception)
+                {
+                }
+                try
+                {
+                    return adjust_hue_eager_fallback(images, scale, name: name, ctx: _ctx);
+                }
+                catch (Exception)
+                {
+                }
+            }
+            Dictionary<string, object> keywords = new();
+            keywords["images"] = images;
+            keywords["scale"] = scale;
+            var _op = tf.OpDefLib._apply_op_helper("AdjustSaturation", name, keywords);
+            var _result = _op.outputs;
+            if (_execute.must_record_gradient())
+            {
+                object[] _attrs = new object[] { "T", _op._get_attr_type("T") };
+                _execute.record_gradient("AdjustSaturation", _op.inputs, _attrs, _result);
+            }
+            return _result[0];
+        }
+
+        public static Tensor adjust_saturation(Tensor images, float scale, string name = null)
+            => adjust_saturation(images, ops.convert_to_tensor(scale), name: name);
+
+        public static Tensor adjust_saturation_eager_fallback(Tensor images, Tensor scale, string name, Context ctx)
+        {
+            Tensor[] _inputs_flat = new Tensor[] { images, scale };
+            object[] _attrs = new object[] { "T", images.dtype };
+            var _result = _execute.execute("AdjustSaturation", 1, inputs: _inputs_flat, attrs: _attrs, ctx: ctx, name: name);
+            if (_execute.must_record_gradient())
+            {
+                _execute.record_gradient("AdjustSaturation", _inputs_flat, _attrs, _result);
+            }
+            return _result[0];
+        }
+
         public static (Tensor, Tensor, Tensor, Tensor) combined_non_max_suppression(Tensor boxes, Tensor scores, Tensor max_output_size_per_class, Tensor max_total_size,
-            Tensor iou_threshold, Tensor score_threshold, bool pad_per_class, bool clip_boxes)
+            Tensor iou_threshold, Tensor score_threshold, bool pad_per_class = false, bool clip_boxes = true, string name = null)
         {
-            throw new NotImplementedException("combined_non_max_suppression");
+            var _ctx = tf.Context;
+            if (_ctx.executing_eagerly())
+            {
+                try
+                {
+                    var _fast_path_result = tf.Runner.TFE_FastPathExecute(new FastPathOpExecInfo(_ctx, "CombinedNonMaxSuppression", name){
+                            args = new object[] {
+                                boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, score_threshold,
+                                "pad_per_class", pad_per_class, "clip_boxes", clip_boxes},
+                            attrs = new Dictionary<string, object>() { }});
+                    return (_fast_path_result[0], _fast_path_result[1], _fast_path_result[2], _fast_path_result[3]);
+                }
+                catch (NotOkStatusException ex)
+                {
+                    throw ex;
+                }
+                catch (Exception)
+                {
+                }
+                try
+                {
+                    return combined_non_max_suppression_eager_fallback(
+                        boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, 
+                        score_threshold, pad_per_class, clip_boxes, name, ctx: _ctx);
+                }
+                catch (Exception)
+                {
+                }
+            }
+            Dictionary<string, object> keywords = new();
+            keywords["boxes"] = boxes;
+            keywords["scores"] = scores;
+            keywords["max_output_size_per_class"] = max_output_size_per_class;
+            keywords["max_total_size"] = max_total_size;
+            keywords["iou_threshold"] = iou_threshold;
+            keywords["score_threshold"] = score_threshold;
+            keywords["pad_per_class"] = pad_per_class;
+            keywords["clip_boxes"] = clip_boxes;
+
+            var _op = tf.OpDefLib._apply_op_helper("CombinedNonMaxSuppression", name, keywords);
+            var _result = _op.outputs;
+            if (_execute.must_record_gradient())
+            {
+                object[] _attrs = new object[] { "pad_per_class", _op._get_attr_type("pad_per_class") ,"clip_boxes", _op._get_attr_type("clip_boxes")};
+                _execute.record_gradient("CombinedNonMaxSuppression", _op.inputs, _attrs, _result);
+            }
+            return (_result[0], _result[1], _result[2], _result[3]);
         }
 
+        public static (Tensor, Tensor, Tensor, Tensor) combined_non_max_suppression_eager_fallback(Tensor boxes, Tensor scores, Tensor max_output_size_per_class, Tensor max_total_size,
+            Tensor iou_threshold, Tensor score_threshold, bool pad_per_class, bool clip_boxes, string name, Context ctx)
+        {
+            Tensor[] _inputs_flat = new Tensor[] { boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, score_threshold };
+            object[] _attrs = new object[] { "pad_per_class", pad_per_class, "clip_boxes", clip_boxes };
+            var _result = _execute.execute("CombinedNonMaxSuppression", 1, inputs: _inputs_flat, attrs: _attrs, ctx: ctx, name: name);
+            if (_execute.must_record_gradient())
+            {
+                _execute.record_gradient("CombinedNonMaxSuppression", _inputs_flat, _attrs, _result);
+            }
+            return (_result[0], _result[1], _result[2], _result[3]);
+        }
+
+        public static Tensor crop_and_resize(Tensor image, Tensor boxes, Tensor box_ind, Tensor crop_size, string method = "bilinear", float extrapolation_value = 0f, string name = null)
+        {
+            var _ctx = tf.Context;
+            if (_ctx.executing_eagerly())
+            {
+                try
+                {
+                    var _fast_path_result = tf.Runner.TFE_FastPathExecute(new FastPathOpExecInfo(_ctx, "CropAndResize", name) { 
+                        args = new object[] { 
+                            image, boxes, box_ind, crop_size, "method", method, "extrapolation_value", extrapolation_value }, attrs = new Dictionary<string, object>() { } });
+                    return _fast_path_result[0];
+                }
+                catch (NotOkStatusException ex)
+                {
+                    throw ex;
+                }
+                catch (Exception)
+                {
+                }
+                try
+                {
+                    return crop_and_resize_eager_fallback(
+                        image, boxes, box_ind, crop_size, method: method, extrapolation_value: extrapolation_value, name: name, ctx: _ctx);
+                }
+                catch (Exception)
+                {
+                }
+            }
+            Dictionary<string, object> keywords = new();
+            keywords["image"] = image;
+            keywords["boxes"] = boxes;
+            keywords["box_ind"] = box_ind;
+            keywords["crop_size"] = crop_size;
+            keywords["method"] = method;
+            keywords["extrapolation_value"] = extrapolation_value;
+            var _op = tf.OpDefLib._apply_op_helper("CropAndResize", name, keywords);
+            var _result = _op.outputs;
+            if (_execute.must_record_gradient())
+            {
+                object[] _attrs = new object[] { "T", _op._get_attr_type("T") ,"method", _op._get_attr_type("method") ,
+                                                "extrapolation_value", _op.get_attr("extrapolation_value")};
+                _execute.record_gradient("CropAndResize", _op.inputs, _attrs, _result);
+            }
+            return _result[0];
+        }
+
+        public static Tensor crop_and_resize_eager_fallback(Tensor image, Tensor boxes, Tensor box_ind, Tensor crop_size, string method, float extrapolation_value, string name, Context ctx)
+        {
+            if (method is null)
+                method = "bilinear";
+            //var method_cpmpat = ByteString.CopyFromUtf8(method ?? string.Empty);
+            //var extrapolation_value_float = (float)extrapolation_value;
+
+            Tensor[] _inputs_flat = new Tensor[] { image, boxes, box_ind, crop_size, tf.convert_to_tensor(method), tf.convert_to_tensor(extrapolation_value) };
+            object[] _attrs = new object[] { "T", image.dtype };
+            var _result = _execute.execute("CropAndResize", 1, inputs: _inputs_flat, attrs: _attrs, ctx: ctx, name: name);
+            if (_execute.must_record_gradient())
+            {
+                _execute.record_gradient("CropAndResize", _inputs_flat, _attrs, _result);
+            }
+            return _result[0];
+        }
+
+
         public static Tensor convert_image_dtype(Tensor image, TF_DataType dtype, bool saturate = false, string name = null)
         {
             if (dtype == image.dtype)
diff --git a/src/TensorFlowNET.Core/Operations/image_ops_impl.cs b/src/TensorFlowNET.Core/Operations/image_ops_impl.cs
index 126df9e42..318b8b142 100644
--- a/src/TensorFlowNET.Core/Operations/image_ops_impl.cs
+++ b/src/TensorFlowNET.Core/Operations/image_ops_impl.cs
@@ -102,11 +102,12 @@ internal static Operation[] _CheckAtLeast3DImage(Tensor image, bool require_stat
             {
                 throw new ValueError("\'image\' must be fully defined.");
             }
-            for (int x = 1; x < 4; x++)
+            var dims = image_shape["-3:"];
+            foreach (var dim in dims.dims)
             {
-                if (image_shape.dims[x] == 0)
+                if (dim == 0)
                 {
-                    throw new ValueError(String.Format("inner 3 dims of \'image.shape\' must be > 0: {0}", image_shape));
+                    throw new ValueError("inner 3 dimensions of \'image\' must be > 0: " + image_shape);
                 }
             }
 
@@ -208,7 +209,7 @@ internal static Tensor _random_flip(Tensor image, int flip_index, int seed, stri
         }
 
         public static Tensor flip_left_right(Tensor image)
-            => _flip(image, 1, "flip_left_right");
+            => _flip(image, 0, "flip_left_right");
 
         public static Tensor flip_up_down(Tensor image)
             => _flip(image, 1, "flip_up_down");
@@ -226,7 +227,7 @@ internal static Tensor _flip(Tensor image, int flip_index, string scope_name)
                   }
                   else if (shape.ndim == 4)
                   {
-                      return gen_array_ops.reverse(image, ops.convert_to_tensor(new[] { flip_index + 1 }));
+                      return gen_array_ops.reverse_v2(image, ops.convert_to_tensor(new[] { (flip_index + 1) % 2 }));
                   }
                   else
                   {
@@ -965,9 +966,9 @@ public static Tensor per_image_standardization(Tensor image)
                  if (Array.Exists(new[] { dtypes.float16, dtypes.float32 }, orig_dtype => orig_dtype == orig_dtype))
                      image = convert_image_dtype(image, dtypes.float32);
 
-                 var num_pixels_ = array_ops.shape(image).dims;
-                 num_pixels_ = num_pixels_.Skip(num_pixels_.Length - 3).Take(num_pixels_.Length - (num_pixels_.Length - 3)).ToArray();
-                 Tensor num_pixels = math_ops.reduce_prod(new Tensor(num_pixels_));
+                 var x = image.shape["-3:"];
+                 var num_pixels = math_ops.reduce_prod(x);
+
                  Tensor image_mean = math_ops.reduce_mean(image, axis: new(-1, -2, -3), keepdims: true);
 
                  var stddev = math_ops.reduce_std(image, axis: new(-1, -2, -3), keepdims: true);
diff --git a/src/TensorFlowNET.Core/Operations/math_ops.cs b/src/TensorFlowNET.Core/Operations/math_ops.cs
index d00a5d367..e77df702f 100644
--- a/src/TensorFlowNET.Core/Operations/math_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/math_ops.cs
@@ -77,6 +77,9 @@ public static Tensor add_n(Tensor[] inputs, string name = null)
         public static Tensor argmax(Tensor input, Axis dimension, TF_DataType output_type = TF_DataType.TF_INT64, string name = null)
             => gen_math_ops.arg_max(input, dimension, output_type: output_type, name: name);
 
+        public static Tensor argmin(Tensor input, Axis dimension, TF_DataType output_type = TF_DataType.TF_INT64, string name = null)
+            => gen_math_ops.arg_min(input, dimension, output_type: output_type, name: name);
+
         public static Tensor round(Tensor x, string name = null)
         {
             x = ops.convert_to_tensor(x, name: "x");
@@ -587,6 +590,17 @@ public static Tensor reduce_any(Tensor input_tensor, Axis axis = null, bool keep
             return _may_reduce_to_scalar(keepdims, axis, max);
         }
 
+        public static Tensor reduce_euclidean_norm(Tensor input_tensor, Axis axis = null, bool keepdims = false, string name = null)
+        {
+            var r = _ReductionDims(input_tensor, axis);
+            var distance = tf.Context.ExecuteOp("EuclideanNorm", name,
+                new ExecuteOpArgs(input_tensor, r).SetAttributes(new
+                {
+                    keep_dims = keepdims
+                }));
+            return _may_reduce_to_scalar(keepdims, axis, distance);
+        }
+
         public static Tensor reduce_max(Tensor input_tensor, Axis axis = null, bool keepdims = false, string name = null)
         {
             var r = _ReductionDims(input_tensor, axis);
@@ -780,10 +794,7 @@ public static Tensor matmul(Tensor a, Tensor b,
             bool adjoint_a = false, bool adjoint_b = false,
             bool a_is_sparse = false, bool b_is_sparse = false,
             string name = null)
-        {
-            Tensor result = null;
-
-            tf_with(ops.name_scope(name, "MatMul", new Tensor[] { a, b }), scope =>
+            => tf_with(ops.name_scope(name, "MatMul", (a, b)), scope =>
             {
                 name = scope;
 
@@ -804,12 +815,10 @@ public static Tensor matmul(Tensor a, Tensor b,
                     transpose_b = true;
                 }
 
-                result = gen_math_ops.mat_mul(a, b, transpose_a, transpose_b, name);
+                return tf.Context.ExecuteOp("MatMul", name, new ExecuteOpArgs(a, b)
+                    .SetAttributes(new { transpose_a, transpose_b }));
             });
 
-            return result;
-        }
-
         public static Tensor batch_matmul(Tensor x, Tensor y,
             bool adj_x = false, bool adj_y = false,
             string name = null)
diff --git a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
index 3bc20289a..be714618d 100644
--- a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
+++ b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
@@ -4,14 +4,14 @@
     <TargetFrameworks>netstandard2.0;net6.0</TargetFrameworks>
     <AssemblyName>Tensorflow.Binding</AssemblyName>
     <RootNamespace>Tensorflow</RootNamespace>
-    <TargetTensorFlow>2.10.0</TargetTensorFlow>
-    <Version>0.110.0</Version>
+    <TargetTensorFlow>2.11.0</TargetTensorFlow>
+    <Version>0.110.3</Version>
     <LangVersion>10.0</LangVersion>
     <Nullable>enable</Nullable>
-    <Authors>Haiping Chen, Meinrad Recheis, Eli Belash</Authors>
+    <Authors>Haiping Chen, Eli Belash, Yaohui Liu, Meinrad Recheis</Authors>
     <Company>SciSharp STACK</Company>
     <GeneratePackageOnBuild>False</GeneratePackageOnBuild>
-    <Copyright>Apache 2.0, Haiping Chen $([System.DateTime]::UtcNow.ToString(yyyy))</Copyright>
+    <Copyright>Apache 2.0, Haiping Chen since 2018</Copyright>
     <RepositoryUrl>https://github.com/SciSharp/TensorFlow.NET</RepositoryUrl>
     <RepositoryType>git</RepositoryType>
     <PackageProjectUrl>http://scisharpstack.org</PackageProjectUrl>
@@ -20,10 +20,11 @@
     <Description>Google's TensorFlow full binding in .NET Standard.
 Building, training and infering deep learning models.
 https://tensorflownet.readthedocs.io</Description>
-    <AssemblyVersion>0.110.0.0</AssemblyVersion>
+    <AssemblyVersion>0.110.3.0</AssemblyVersion>
     <PackageReleaseNotes>
 		tf.net 0.110.x and above are based on tensorflow native 2.11.0
-		* RNN, LSTM works.
+		* Support RNN, LSTM model.
+		* Support Transformer model.
 		
 		tf.net 0.100.x and above are based on tensorflow native 2.10.0
 
@@ -42,12 +43,11 @@ https://tensorflownet.readthedocs.io</Description>
 		tf.net 0.10x.x aligns with TensorFlow v2.10.x native library.
 		tf.net 0.11x.x aligns with TensorFlow v2.11.x native library.
 	</PackageReleaseNotes>
-    <FileVersion>0.110.0.0</FileVersion>
+    <FileVersion>0.110.3.0</FileVersion>
     <PackageLicenseFile>LICENSE</PackageLicenseFile>
     <PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
     <PackageOutputPath>packages</PackageOutputPath>
     <SignAssembly>true</SignAssembly>
-    <AssemblyOriginatorKeyFile>Open.snk</AssemblyOriginatorKeyFile>
     <Platforms>AnyCPU;x64</Platforms>
     <PackageId>TensorFlow.NET</PackageId>
     <Configurations>Debug;Release;GPU</Configurations>
@@ -88,6 +88,66 @@ https://tensorflownet.readthedocs.io</Description>
     <DefineConstants />
   </PropertyGroup>
 
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|netstandard2.0|AnyCPU'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|netstandard2.0|x64'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|netstandard2.0|AnyCPU'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|netstandard2.0|x64'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='GPU|netstandard2.0|AnyCPU'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='GPU|netstandard2.0|x64'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|net6.0|AnyCPU'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|net6.0|x64'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|net6.0|AnyCPU'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|net6.0|x64'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='GPU|net6.0|AnyCPU'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='GPU|net6.0|x64'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
   <ItemGroup>
     <Compile Remove="Distribute\**" />
     <Compile Remove="Models\**" />
@@ -112,10 +172,11 @@ https://tensorflownet.readthedocs.io</Description>
   </ItemGroup>
 
   <ItemGroup>
-    <PackageReference Include="MethodBoundaryAspect.Fody" Version="2.0.148" />
+    <PackageReference Include="MethodBoundaryAspect.Fody" Version="2.0.149" />
     <PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
     <PackageReference Include="OneOf" Version="3.0.255" />
     <PackageReference Include="Protobuf.Text" Version="0.7.1" />
+    <PackageReference Include="Razorvine.Pickle" Version="1.4.0" />
     <PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
   </ItemGroup>
 
diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.Conversions.cs b/src/TensorFlowNET.Core/Tensors/Tensor.Conversions.cs
index 18bdc1aaf..fdd62aeed 100644
--- a/src/TensorFlowNET.Core/Tensors/Tensor.Conversions.cs
+++ b/src/TensorFlowNET.Core/Tensors/Tensor.Conversions.cs
@@ -14,19 +14,10 @@ You may obtain a copy of the License at
    limitations under the License.
 ******************************************************************************/
 
-using Tensorflow.NumPy;
-using System;
-using System.Diagnostics.CodeAnalysis;
-using System.Text;
-using Tensorflow.Framework.Models;
-using static Tensorflow.Binding;
+namespace Tensorflow;
 
-namespace Tensorflow
+public partial class Tensor
 {
-    [SuppressMessage("ReSharper", "InvokeAsExtensionMethod")]
-    public partial class Tensor
-    {
-        public TensorSpec ToTensorSpec()
-            => new TensorSpec(shape, dtype, name);
-    }
+    public TensorSpec ToTensorSpec()
+        => new TensorSpec(shape, dtype, name);
 }
\ No newline at end of file
diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.Index.cs b/src/TensorFlowNET.Core/Tensors/Tensor.Index.cs
index c8f47825c..51062cf3b 100644
--- a/src/TensorFlowNET.Core/Tensors/Tensor.Index.cs
+++ b/src/TensorFlowNET.Core/Tensors/Tensor.Index.cs
@@ -42,7 +42,7 @@ public Tensor this[params Slice[] slices]
                                 array_ops.stack(args.End),
                                 array_ops.stack(args.Strides));
 
-                        return gen_array_ops.strided_slice(
+                        return array_ops.strided_slice(
                             this,
                             packed_begin,
                             packed_end,
@@ -180,8 +180,7 @@ public Tensor slice(int start)
                             array_ops.stack(end.ToArray()),
                             array_ops.stack(strides.ToArray()));
 
-                    return gen_array_ops.strided_slice(
-                        this,
+                    return array_ops.strided_slice(this,
                         packed_begin,
                         packed_end,
                         packed_strides,
diff --git a/src/TensorFlowNET.Core/APIs/tf.exp.cs b/src/TensorFlowNET.Core/Tensors/Tensor.Keras.cs
similarity index 75%
rename from src/TensorFlowNET.Core/APIs/tf.exp.cs
rename to src/TensorFlowNET.Core/Tensors/Tensor.Keras.cs
index 56ea1898e..ca946ca48 100644
--- a/src/TensorFlowNET.Core/APIs/tf.exp.cs
+++ b/src/TensorFlowNET.Core/Tensors/Tensor.Keras.cs
@@ -1,6 +1,6 @@
 ﻿/*****************************************************************************
    Copyright 2018 The TensorFlow.NET Authors. All Rights Reserved.
-
+   
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at
@@ -14,12 +14,14 @@ You may obtain a copy of the License at
    limitations under the License.
 ******************************************************************************/
 
-namespace Tensorflow
+namespace Tensorflow;
+
+public partial class Tensor
 {
-    public partial class tensorflow
-    {
-        public Tensor exp(Tensor x,
-                string name = null) => gen_math_ops.exp(x, name);
+    public bool IsFromKerasTensor { get; set; }
 
-    }
-}
+    /// <summary>
+    /// Keras History: (Layer, (node_index, tensor_index))
+    /// </summary>
+    public KerasHistory KerasHistory { get; set; }
+}
\ No newline at end of file
diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.cs b/src/TensorFlowNET.Core/Tensors/Tensor.cs
index c0e5d4357..65e1c8576 100644
--- a/src/TensorFlowNET.Core/Tensors/Tensor.cs
+++ b/src/TensorFlowNET.Core/Tensors/Tensor.cs
@@ -146,11 +146,6 @@ public int[] _shape_tuple()
             return rank < 0 ? null : shape.dims.Select(x => (int)x).ToArray();
         }
 
-        /// <summary>
-        /// Keras History: (Layer, (node_index, tensor_index))
-        /// </summary>
-        public KerasHistory KerasHistory { get; set; }
-
         /// <summary>
         ///     Updates the shape of this tensor.
         /// </summary>
diff --git a/src/TensorFlowNET.Core/Tensors/shape_utils.cs b/src/TensorFlowNET.Core/Tensors/shape_utils.cs
index 254cdad89..a77dd34ce 100644
--- a/src/TensorFlowNET.Core/Tensors/shape_utils.cs
+++ b/src/TensorFlowNET.Core/Tensors/shape_utils.cs
@@ -1,5 +1,6 @@
 ﻿using System;
 using System.Linq;
+using Tensorflow.Eager;
 using static Tensorflow.Binding;
 
 namespace Tensorflow
@@ -13,5 +14,31 @@ public static Tensor static_or_dynamic_map_fn(Func<Tensor, Tensor> fn, Tensor el
 
             throw new NotImplementedException("");
         }
+
+        public static Shape from_object_array(object[] shape)
+        {
+            var dims = shape.Select(x =>
+            {
+                if (x is KerasTensor kt && kt.inferred_value != null)
+                {
+                    return kt.inferred_value.as_int_list()[0];
+                }
+                else if (x is EagerTensor et && et.dtype == TF_DataType.TF_INT32)
+                {
+                    return et.ToArray<int>()[0];
+                }
+                else if (x is int i)
+                {
+                    return i;
+                }
+                else if (x is long l)
+                {
+                    return l;
+                }
+                throw new NotImplementedException();
+            }).ToArray();
+
+            return new Shape(dims);
+        }
     }
 }
diff --git a/src/TensorFlowNET.Core/Tensors/tf.constant.cs b/src/TensorFlowNET.Core/Tensors/tf.constant.cs
index 6a62d34a5..ac26b3da3 100644
--- a/src/TensorFlowNET.Core/Tensors/tf.constant.cs
+++ b/src/TensorFlowNET.Core/Tensors/tf.constant.cs
@@ -46,6 +46,9 @@ public Tensor zeros(Tensor shape, TF_DataType dtype = TF_DataType.TF_FLOAT, stri
         public Tensor ones(Shape shape, TF_DataType dtype = TF_DataType.TF_FLOAT, string name = null)
             => array_ops.ones(shape, dtype, name);
 
+        public Tensor ones(Tensor shape, TF_DataType dtype = TF_DataType.TF_FLOAT, string name = null)
+            => array_ops.ones(shape, dtype, name);
+
         public Tensor size(Tensor input,
             string name = null,
             TF_DataType out_type = TF_DataType.TF_INT32) => array_ops.size(input,
diff --git a/src/TensorFlowNET.Core/Training/gen_training_ops.cs b/src/TensorFlowNET.Core/Training/gen_training_ops.cs
index abe85a141..df7dd9e65 100644
--- a/src/TensorFlowNET.Core/Training/gen_training_ops.cs
+++ b/src/TensorFlowNET.Core/Training/gen_training_ops.cs
@@ -51,5 +51,9 @@ public static Tensor apply_gradient_descent(IVariableV1 var, Tensor alpha, Tenso
         public static Tensor resource_apply_gradient_descent(Tensor var, Tensor alpha, Tensor delta, bool use_locking = false, string name = null)
             => tf.Context.ExecuteOp("ResourceApplyGradientDescent", name, 
                 new ExecuteOpArgs(var, alpha, delta).SetAttributes(new { use_locking }));
+
+        public static Tensor resource_apply_keras_momentum(Tensor var, Tensor accum, Tensor lr, Tensor grad, Tensor momentum, bool use_locking = false, bool use_nesterov = false, string name = null)
+            => tf.Context.ExecuteOp("ResourceApplyKerasMomentum", name, 
+                new ExecuteOpArgs(var, accum, lr, grad, momentum).SetAttributes(new { use_locking, use_nesterov }));
     }
 }
diff --git a/src/TensorFlowNET.Core/ops.cs b/src/TensorFlowNET.Core/ops.cs
index 7bd78a79f..351fd18ff 100644
--- a/src/TensorFlowNET.Core/ops.cs
+++ b/src/TensorFlowNET.Core/ops.cs
@@ -138,9 +138,22 @@ public static Tensor convert_to_tensor(object value,
                 else
                 {
                     var graph = get_default_graph();
+                    if (graph is FuncGraph funcGraph)
+                    {
+                        return funcGraph.capture(eager_tensor, name: name);
+                    }
                     if (!graph.building_function)
-                        throw new RuntimeError("Attempting to capture an EagerTensor without building a function.");
-                    return (graph as FuncGraph).capture(eager_tensor, name: name);
+                    {
+                        // throw new RuntimeError("Attempting to capture an EagerTensor without building a function.");
+                        return eager_tensor.AsPlaceholder(name: name);
+                    }
+                }
+            }
+            else if (value is KerasTensor kt)
+            {
+                if (kt.inferred_value != null)
+                {
+                    return convert_to_tensor(kt.inferred_value, dtype: kt.dtype, name: name);
                 }
             }
 
@@ -565,7 +578,9 @@ public static bool executing_eagerly_outside_functions()
             if (tf.Context.executing_eagerly())
                 return true;
             else
-                throw new NotImplementedException("");
+                // TODO(Wanglongzhi2001), implement the false case
+                return true;
+            //throw new NotImplementedException("");
         }
 
         public static bool inside_function()
diff --git a/src/TensorFlowNET.Core/tensorflow.cs b/src/TensorFlowNET.Core/tensorflow.cs
index dc4e48da8..e368b37cd 100644
--- a/src/TensorFlowNET.Core/tensorflow.cs
+++ b/src/TensorFlowNET.Core/tensorflow.cs
@@ -14,6 +14,7 @@ You may obtain a copy of the License at
    limitations under the License.
 ******************************************************************************/
 
+using Razorvine.Pickle;
 using Serilog;
 using Serilog.Core;
 using System.Reflection;
@@ -22,6 +23,7 @@ limitations under the License.
 using Tensorflow.Eager;
 using Tensorflow.Gradients;
 using Tensorflow.Keras;
+using Tensorflow.NumPy.Pickle;
 
 namespace Tensorflow
 {
@@ -98,6 +100,10 @@ public tensorflow()
                     "please visit https://github.com/SciSharp/TensorFlow.NET. If it still not work after installing the backend, please submit an " +
                     "issue to https://github.com/SciSharp/TensorFlow.NET/issues");
             }
+
+            // register numpy reconstructor for pickle
+            Unpickler.registerConstructor("numpy.core.multiarray", "_reconstruct", new MultiArrayConstructor());
+            Unpickler.registerConstructor("numpy", "dtype", new DtypeConstructor());
         }
 
         public string VERSION => c_api.StringPiece(c_api.TF_Version());
diff --git a/src/TensorFlowNET.Keras/Activations.cs b/src/TensorFlowNET.Keras/Activations.cs
index d6d8e3914..ce5b4eb13 100644
--- a/src/TensorFlowNET.Keras/Activations.cs
+++ b/src/TensorFlowNET.Keras/Activations.cs
@@ -44,7 +44,6 @@ public class Activations: IActivationsApi
         /// <summary>
         /// Register the name-activation mapping in this static class.
         /// </summary>
-        /// <param name="name"></param>
         /// <param name="activation"></param>
         private static void RegisterActivation(Activation activation)
         {
diff --git a/src/TensorFlowNET.Keras/BackendImpl.cs b/src/TensorFlowNET.Keras/BackendImpl.cs
index 364800ae5..574cf5990 100644
--- a/src/TensorFlowNET.Keras/BackendImpl.cs
+++ b/src/TensorFlowNET.Keras/BackendImpl.cs
@@ -76,7 +76,7 @@ public void track_variable(IVariableV1 v)
             _GRAPH_VARIABLES[graph.graph_key] = v;
         }
 
-        public Tensor placeholder(Shape shape = null,
+        public KerasTensor placeholder(Shape shape = null,
             int ndim = -1,
             TF_DataType dtype = TF_DataType.DtInvalid,
             bool sparse = false,
diff --git a/src/TensorFlowNET.Keras/Callbacks/CallbackList.cs b/src/TensorFlowNET.Keras/Callbacks/CallbackList.cs
index 362f2280c..cb16aafa3 100644
--- a/src/TensorFlowNET.Keras/Callbacks/CallbackList.cs
+++ b/src/TensorFlowNET.Keras/Callbacks/CallbackList.cs
@@ -73,4 +73,9 @@ public void on_test_batch_end(long end_step, Dictionary<string, float> logs)
     {
         callbacks.ForEach(x => x.on_test_batch_end(end_step, logs));
     }
+
+    public void on_test_end(Dictionary<string, float> logs)
+    {
+        callbacks.ForEach(x => x.on_test_end(logs));
+    }
 }
diff --git a/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs b/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs
index 73ccc87b0..a2a2ecfe2 100644
--- a/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs
+++ b/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs
@@ -5,13 +5,10 @@ namespace Tensorflow.Keras.Callbacks;
 /// <summary>
 /// Stop training when a monitored metric has stopped improving. 
 /// </summary>
-/// <param name="parameters"></param>
-/// <param name="monitor"></param>
-
 public class EarlyStopping: ICallback
 {
     int _paitence;
-    int _min_delta;
+    float _min_delta;
     int _verbose;
     int _stopped_epoch;
     int _wait;
@@ -22,11 +19,13 @@ public class EarlyStopping: ICallback
     string _monitor;
     string _mode;
     bool _restore_best_weights;
-    List<IVariableV1>? _best_weights;
+    List<NDArray>? _best_weights;
     CallbackParams _parameters;
+    Func<NDArray, NDArray, NDArray> _monitor_op;
+
     public Dictionary<string, List<float>>? history { get; set; }
     // user need to pass a CallbackParams to EarlyStopping, CallbackParams at least need the model
-    public EarlyStopping(CallbackParams parameters,string monitor = "val_loss", int min_delta = 0, int patience = 0,
+    public EarlyStopping(CallbackParams parameters,string monitor = "val_loss", float min_delta = 0f, int patience = 0,
         int verbose = 1, string mode = "auto", float baseline = 0f, bool restore_best_weights = false,
         int start_from_epoch = 0)
     {
@@ -41,17 +40,49 @@ public EarlyStopping(CallbackParams parameters,string monitor = "val_loss", int
         _min_delta = Math.Abs(min_delta);
         _restore_best_weights = restore_best_weights;
         _mode = mode;
-        if (mode != "auto" && mode != "min" && mode != "max")
+
+        if (_mode != "auto" && _mode != "min" && _mode != "max")
+        {
+            Console.WriteLine($"EarlyStopping mode {_mode} is unknown, fallback to auto mode.");
+            _mode = "auto";
+        }
+
+        if (_mode == "min")
+        {
+            _monitor_op = np.less;
+        }
+        else if (_mode == "max")
+        {
+            _monitor_op = np.greater;
+        }
+        else
+        {
+            if (_monitor.EndsWith("acc") || _monitor.EndsWith("accuracy") || _monitor.EndsWith("auc"))
+            {
+                _monitor_op = np.greater;
+            }
+            else
+            {
+                _monitor_op = np.less;
+            }   
+        }
+
+        if (_monitor_op == np.greater)
         {
-            Console.WriteLine("EarlyStopping mode %s is unknown, fallback to auto mode.", mode);
+            _min_delta *= 1;
+        }
+        else
+        {
+            _min_delta *= -1;
         }
     }
     public void on_train_begin()
     {
         _wait = 0;
         _stopped_epoch = 0;
+        _best = _monitor_op == np.less ? (float)np.Inf : (float)-np.Inf;
+        _best_weights = null;
         _best_epoch = 0;
-        _best = (float)np.Inf;
     }
 
     public void on_epoch_begin(int epoch)
@@ -77,7 +108,7 @@ public void on_epoch_end(int epoch, Dictionary<string, float> epoch_logs)
         // Restore the weights after first epoch if no progress is ever made.
         if (_restore_best_weights && _best_weights == null)
         {
-            _best_weights = _parameters.Model.Weights;
+            _best_weights = _parameters.Model.get_weights();
         }
         _wait += 1;
 
@@ -86,7 +117,7 @@ public void on_epoch_end(int epoch, Dictionary<string, float> epoch_logs)
             _best = current;
             _best_epoch = epoch;
             if (_restore_best_weights)
-                _best_weights = _parameters.Model.TrainableWeights;
+                _best_weights = _parameters.Model.get_weights();
             // Only restart wait if we beat both the baseline and our previous best.
             if (_baseline == 0f || _is_improvement(current, _baseline))
                 _wait = 0;
@@ -102,7 +133,7 @@ public void on_epoch_end(int epoch, Dictionary<string, float> epoch_logs)
                 {
                     Console.WriteLine($"Restoring model weights from the end of the best epoch: {_best_epoch + 1}");
                 }
-                _parameters.Model.Weights = _best_weights;
+                _parameters.Model.set_weights(_best_weights);
             }
         }
     }
@@ -134,20 +165,10 @@ float get_monitor_value(Dictionary<string, float> logs)
     }
     public bool _is_improvement(float monitor_value, float reference_value)
     {
-        bool less_op = (monitor_value - _min_delta) < reference_value;
-        bool greater_op = (monitor_value - _min_delta) >= reference_value;
-        if (_mode == "min")
-            return less_op;
-        else if (_mode == "max")
-            return greater_op;
-        else
-        {
-            if (_monitor.EndsWith("acc") || _monitor.EndsWith("accuracy") || _monitor.EndsWith("auc"))
-            {
-                return greater_op;
-            }
-            else
-                return less_op;
-        }
+        return _monitor_op(monitor_value - _min_delta, reference_value);
+    }
+
+    public void on_test_end(Dictionary<string, float> logs)
+    {
     }
 }
diff --git a/src/TensorFlowNET.Keras/Callbacks/History.cs b/src/TensorFlowNET.Keras/Callbacks/History.cs
index c34f253d1..6d3ff6c38 100644
--- a/src/TensorFlowNET.Keras/Callbacks/History.cs
+++ b/src/TensorFlowNET.Keras/Callbacks/History.cs
@@ -81,4 +81,8 @@ public void on_test_batch_begin(long step)
     public void on_test_batch_end(long end_step, Dictionary<string, float> logs)
     {
     }
+
+    public void on_test_end(Dictionary<string, float> logs)
+    {
+    }
 }
diff --git a/src/TensorFlowNET.Keras/Callbacks/ProgbarLogger.cs b/src/TensorFlowNET.Keras/Callbacks/ProgbarLogger.cs
index 9f2b1eb31..23b18cd47 100644
--- a/src/TensorFlowNET.Keras/Callbacks/ProgbarLogger.cs
+++ b/src/TensorFlowNET.Keras/Callbacks/ProgbarLogger.cs
@@ -118,5 +118,8 @@ public void on_test_batch_end(long end_step, Dictionary<string, float> logs)
             }
         }
 
+        public void on_test_end(Dictionary<string, float> logs)
+        {
+        }
     }
 }
diff --git a/src/TensorFlowNET.Keras/Datasets/Imdb.cs b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
index 56b0d2a77..4d6df913b 100644
--- a/src/TensorFlowNET.Keras/Datasets/Imdb.cs
+++ b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
@@ -3,8 +3,6 @@
 using System.IO;
 using System.Text;
 using Tensorflow.Keras.Utils;
-using Tensorflow.NumPy;
-using System.Linq;
 
 namespace Tensorflow.Keras.Datasets
 {
@@ -12,11 +10,57 @@ namespace Tensorflow.Keras.Datasets
     /// This is a dataset of 25,000 movies reviews from IMDB, labeled by sentiment
     /// (positive/negative). Reviews have been preprocessed, and each review is
     /// encoded as a list of word indexes(integers).
+    /// For convenience, words are indexed by overall frequency in the dataset,
+    /// so that for instance the integer "3" encodes the 3rd most frequent word in
+    /// the data.This allows for quick filtering operations such as:
+    /// "only consider the top 10,000 most
+    /// common words, but eliminate the top 20 most common words".
+    /// As a convention, "0" does not stand for a specific word, but instead is used
+    /// to encode the pad token.
+    /// Args:
+    /// path: where to cache the data (relative to %TEMP%/imdb/imdb.npz).
+    /// num_words: integer or None.Words are
+    ///     ranked by how often they occur(in the training set) and only
+    ///     the `num_words` most frequent words are kept.Any less frequent word
+    ///     will appear as `oov_char` value in the sequence data.If None,
+    ///     all words are kept.Defaults to `None`.
+    /// skip_top: skip the top N most frequently occurring words
+    ///     (which may not be informative). These words will appear as
+    ///     `oov_char` value in the dataset.When 0, no words are
+    ///     skipped. Defaults to `0`.
+    /// maxlen: int or None.Maximum sequence length.
+    ///     Any longer sequence will be truncated. None, means no truncation.
+    ///     Defaults to `None`.
+    /// seed: int. Seed for reproducible data shuffling.
+    /// start_char: int. The start of a sequence will be marked with this
+    ///     character. 0 is usually the padding character. Defaults to `1`.
+    /// oov_char: int. The out-of-vocabulary character.
+    ///     Words that were cut out because of the `num_words` or
+    ///     `skip_top` limits will be replaced with this character.
+    /// index_from: int. Index actual words with this index and higher.
+    ///     Returns:
+    /// Tuple of Numpy arrays: `(x_train, labels_train), (x_test, labels_test)`.
+    /// 
+    /// ** x_train, x_test**: lists of sequences, which are lists of indexes
+    ///     (integers). If the num_words argument was specific, the maximum
+    ///     possible index value is `num_words - 1`. If the `maxlen` argument was
+    ///     specified, the largest possible sequence length is `maxlen`.
+    /// 
+    /// ** labels_train, labels_test**: lists of integer labels(1 or 0).
+    /// 
+    /// Raises:
+    /// ValueError: in case `maxlen` is so low
+    ///     that no input sequence could be kept.
+    /// Note that the 'out of vocabulary' character is only used for
+    /// words that were present in the training set but are not included
+    /// because they're not making the `num_words` cut here.
+    /// Words that were not seen in the training set but are in the test set
+    /// have simply been skipped.
     /// </summary>
+    /// """Loads the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/).
     public class Imdb
     {
         string origin_folder = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/";
-        string file_name = "imdb.npz";
         string dest_folder = "imdb";
 
         /// <summary>
@@ -31,50 +75,163 @@ public class Imdb
         /// <param name="oov_char"></param>
         /// <param name="index_from"></param>
         /// <returns></returns>
-        public DatasetPass load_data(string path = "imdb.npz",
-            int num_words = -1,
+        public DatasetPass load_data(
+            string path = "imdb.npz",
+            int? num_words = null,
             int skip_top = 0,
-            int maxlen = -1,
+            int? maxlen = null,
             int seed = 113,
-            int start_char = 1,
-            int oov_char= 2,
+            int? start_char = 1,
+            int? oov_char = 2,
             int index_from = 3)
         {
-            var dst = Download();
+            path = data_utils.get_file(
+                path,
+                origin: Path.Combine(origin_folder, "imdb.npz"),
+                file_hash: "69664113be75683a8fe16e3ed0ab59fda8886cb3cd7ada244f7d9544e4676b9f"
+            );
+            path = Path.Combine(path, "imdb.npz");
+            var fileBytes = File.ReadAllBytes(path);
+            var (x_train, x_test) = LoadX(fileBytes);
+            var (labels_train, labels_test) = LoadY(fileBytes);
 
-            var lines = File.ReadAllLines(Path.Combine(dst, "imdb_train.txt"));
-            var x_train_string = new string[lines.Length];
-            var y_train = np.zeros(new int[] { lines.Length }, np.int64);
-            for (int i = 0; i < lines.Length; i++)
+            var indices = np.arange<int>(len(x_train));
+            np.random.shuffle(indices, seed);
+            x_train = x_train[indices];
+            labels_train = labels_train[indices];
+
+            indices = np.arange<int>(len(x_test));
+            np.random.shuffle(indices, seed);
+            x_test = x_test[indices];
+            labels_test = labels_test[indices];
+
+            var x_train_array = (int[,])x_train.ToMultiDimArray<int>();
+            var x_test_array = (int[,])x_test.ToMultiDimArray<int>();
+            var labels_train_array = (long[])labels_train.ToArray<long>();
+            var labels_test_array = (long[])labels_test.ToArray<long>();
+
+            if (start_char != null)
+            {
+                var (d1, d2) = (x_train_array.GetLength(0), x_train_array.GetLength(1));
+                int[,] new_x_train_array = new int[d1, d2 + 1];
+                for (var i = 0; i < d1; i++)
+                {
+                    new_x_train_array[i, 0] = (int)start_char;
+                    Array.Copy(x_train_array, i * d2, new_x_train_array, i * (d2 + 1) + 1, d2);
+                }
+                (d1, d2) = (x_test_array.GetLength(0), x_test_array.GetLength(1));
+                int[,] new_x_test_array = new int[d1, d2 + 1];
+                for (var i = 0; i < d1; i++)
+                {
+                    new_x_test_array[i, 0] = (int)start_char;
+                    Array.Copy(x_test_array, i * d2, new_x_test_array, i * (d2 + 1) + 1, d2);
+                }
+                x_train_array = new_x_train_array;
+                x_test_array = new_x_test_array;
+            }
+            else if (index_from != 0)
             {
-                y_train[i] = long.Parse(lines[i].Substring(0, 1));
-                x_train_string[i] = lines[i].Substring(2);
+                var (d1, d2) = (x_train_array.GetLength(0), x_train_array.GetLength(1));
+                for (var i = 0; i < d1; i++)
+                {
+                    for (var j = 0; j < d2; j++)
+                    {
+                        if (x_train_array[i, j] == 0)
+                            break;
+                        x_train_array[i, j] += index_from;
+                    }
+                }
+                (d1, d2) = (x_test_array.GetLength(0), x_test_array.GetLength(1));
+                for (var i = 0; i < d1; i++)
+                {
+                    for (var j = 0; j < d2; j++)
+                    {
+                        if (x_test_array[i, j] == 0)
+                            break;
+                        x_test[i, j] += index_from;
+                    }
+                }
             }
 
-            var x_train = np.array(x_train_string);
+            if (maxlen == null)
+            {
+                maxlen = max(x_train_array.GetLength(1), x_test_array.GetLength(1));
+            }
+            (x_train_array, labels_train_array) = data_utils._remove_long_seq((int)maxlen, x_train_array, labels_train_array);
+            (x_test_array, labels_test_array) = data_utils._remove_long_seq((int)maxlen, x_test_array, labels_test_array);
+            if (x_train_array.Length == 0 || x_test_array.Length == 0)
+                throw new ValueError("After filtering for sequences shorter than maxlen=" +
+                    $"{maxlen}, no sequence was kept. Increase maxlen.");
+
+            int[,] xs_array = new int[x_train_array.GetLength(0) + x_test_array.GetLength(0), (int)maxlen];
+            Array.Copy(x_train_array, xs_array, x_train_array.Length);
+            Array.Copy(x_test_array, 0, xs_array, x_train_array.Length, x_train_array.Length);
+
+            long[] labels_array = new long[labels_train_array.Length + labels_test_array.Length];
+            Array.Copy(labels_train_array, labels_array, labels_train_array.Length);
+            Array.Copy(labels_test_array, 0, labels_array, labels_train_array.Length, labels_test_array.Length);
+
+            if (num_words == null)
+            {
+                var (d1, d2) = (xs_array.GetLength(0), xs_array.GetLength(1));
+                num_words = 0;
+                for (var i = 0; i < d1; i++)
+                    for (var j = 0; j < d2; j++)
+                        num_words = max((int)num_words, (int)xs_array[i, j]);
+            }
 
-            File.ReadAllLines(Path.Combine(dst, "imdb_test.txt"));
-            var x_test_string = new string[lines.Length];
-            var y_test = np.zeros(new int[] { lines.Length }, np.int64);
-            for (int i = 0; i < lines.Length; i++)
+            // by convention, use 2 as OOV word
+            // reserve 'index_from' (=3 by default) characters:
+            // 0 (padding), 1 (start), 2 (OOV)
+            if (oov_char != null)
             {
-                y_test[i] = long.Parse(lines[i].Substring(0, 1));
-                x_test_string[i] = lines[i].Substring(2);
+                var (d1, d2) = (xs_array.GetLength(0), xs_array.GetLength(1));
+                int[,] new_xs_array = new int[d1, d2];
+                for (var i = 0; i < d1; i++)
+                {
+                    for (var j = 0; j < d2; j++)
+                    {
+                        if (xs_array[i, j] == 0 || skip_top <= xs_array[i, j] && xs_array[i, j] < num_words)
+                            new_xs_array[i, j] = xs_array[i, j];
+                        else
+                            new_xs_array[i, j] = (int)oov_char;
+                    }
+                }
+                xs_array = new_xs_array;
             }
+            else
+            {
+                var (d1, d2) = (xs_array.GetLength(0), xs_array.GetLength(1));
+                int[,] new_xs_array = new int[d1, d2];
+                for (var i = 0; i < d1; i++)
+                {
+                    int k = 0;
+                    for (var j = 0; j < d2; j++)
+                    {
+                        if (xs_array[i, j] == 0 || skip_top <= xs_array[i, j] && xs_array[i, j] < num_words)
+                            new_xs_array[i, k++] = xs_array[i, j];
+                    }
+                }
+                xs_array = new_xs_array;
+            }
+
+            Array.Copy(xs_array, x_train_array, x_train_array.Length);
+            Array.Copy(xs_array, x_train_array.Length, x_test_array, 0, x_train_array.Length);
 
-            var x_test = np.array(x_test_string);
+            Array.Copy(labels_array, labels_train_array, labels_train_array.Length);
+            Array.Copy(labels_array, labels_train_array.Length, labels_test_array, 0, labels_test_array.Length);
 
             return new DatasetPass
             {
-                Train = (x_train, y_train),
-                Test = (x_test, y_test)
+                Train = (x_train_array, labels_train_array),
+                Test = (x_test_array, labels_test_array)
             };
         }
 
         (NDArray, NDArray) LoadX(byte[] bytes)
         {
-            var y = np.Load_Npz<byte[]>(bytes);
-            return (y["x_train.npy"], y["x_test.npy"]);
+            var x = np.Load_Npz<int[,]>(bytes);
+            return (x["x_train.npy"], x["x_test.npy"]);
         }
 
         (NDArray, NDArray) LoadY(byte[] bytes)
@@ -82,16 +239,5 @@ public DatasetPass load_data(string path = "imdb.npz",
             var y = np.Load_Npz<long[]>(bytes);
             return (y["y_train.npy"], y["y_test.npy"]);
         }
-
-        string Download()
-        {
-            var dst = Path.Combine(Path.GetTempPath(), dest_folder);
-            Directory.CreateDirectory(dst);
-
-            Web.Download(origin_folder + file_name, dst, file_name);
-
-            return dst;
-            // return Path.Combine(dst, file_name);
-        }
     }
 }
diff --git a/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs b/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs
index b93c6aed7..16e646a35 100644
--- a/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs
+++ b/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs
@@ -52,7 +52,7 @@ Tensors permutation(Tensors tensor)
         /// <summary>
         /// Convert a Tensor of indices into a dataset of batched indices.
         /// </summary>
-        /// <param name="tensor"></param>
+        /// <param name="indices"></param>
         /// <returns></returns>
         IDatasetV2 slice_batch_indices(Tensor indices)
         {
diff --git a/src/TensorFlowNET.Keras/Engine/Layer.Apply.cs b/src/TensorFlowNET.Keras/Engine/Layer.Apply.cs
index d52190fd3..a3831bffa 100644
--- a/src/TensorFlowNET.Keras/Engine/Layer.Apply.cs
+++ b/src/TensorFlowNET.Keras/Engine/Layer.Apply.cs
@@ -10,10 +10,10 @@ public partial class Layer
         /// Wraps `call`, applying pre- and post-processing steps.
         /// </summary>
         /// <param name="inputs"></param>
-        /// <param name="state"></param>
+        /// <param name="states"></param>
         /// <param name="training"></param>
         /// <returns></returns>
-        public virtual Tensors Apply(Tensors inputs, Tensors states = null, bool training = false, IOptionalArgs? optional_args = null)
+        public virtual Tensors Apply(Tensors inputs, Tensors states = null, bool? training = false, IOptionalArgs? optional_args = null)
         {
             if (callContext.Value == null)
                 callContext.Value = new CallContext();
diff --git a/src/TensorFlowNET.Keras/Engine/Layer.FunctionalConstructionCall.cs b/src/TensorFlowNET.Keras/Engine/Layer.FunctionalConstructionCall.cs
index 1d96e5811..e4023c3fd 100644
--- a/src/TensorFlowNET.Keras/Engine/Layer.FunctionalConstructionCall.cs
+++ b/src/TensorFlowNET.Keras/Engine/Layer.FunctionalConstructionCall.cs
@@ -1,7 +1,5 @@
 ﻿using System;
 using Tensorflow.Keras.Utils;
-using static Tensorflow.Binding;
-using static Tensorflow.KerasApi;
 
 namespace Tensorflow.Keras.Engine
 {
@@ -9,14 +7,6 @@ public partial class Layer
     {
         Tensors FunctionalConstructionCall(Tensors inputs)
         {
-            bool mask_arg_passed_by_framework = false;
-            bool training_arg_passed_by_framework = false;
-            Tensor training_value = null;
-            if (training_value == null)
-            {
-                training_arg_passed_by_framework = true;
-            }
-
             if (base_layer_utils.needs_keras_history(inputs))
                 base_layer_utils.create_keras_history(inputs);
 
diff --git a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
index eaa9eb23c..a74a77f18 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
@@ -15,7 +15,7 @@ namespace Tensorflow.Keras.Engine
     public partial class Model
     {
         /// <summary>
-        /// Returns the loss value & metrics values for the model in test mode.
+        /// Returns the loss value and metrics values for the model in test mode.
         /// </summary>
         /// <param name="x"></param>
         /// <param name="y"></param>
@@ -27,7 +27,7 @@ public partial class Model
         /// <param name="use_multiprocessing"></param>
         /// <param name="return_dict"></param>
         /// <param name="is_val"></param>
-        public Dictionary<string, float> evaluate(Tensor x, Tensor y,
+        public Dictionary<string, float> evaluate(NDArray x, NDArray y,
             int batch_size = -1,
             int verbose = 1,
             int steps = -1,
@@ -115,62 +115,53 @@ public Dictionary<string, float> evaluate(IDatasetV2 x, int verbose = 1, bool is
         /// <param name="test_func">The function to be called on each batch of data.</param>
         /// <param name="is_val">Whether it is validation or test.</param>
         /// <returns></returns>
-        Dictionary<string, float> evaluate(DataHandler data_handler, CallbackList callbacks, bool is_val, Func<DataHandler, Tensor[], Dictionary<string, float>> test_func)
+        Dictionary<string, float> evaluate(DataHandler data_handler, CallbackList callbacks, bool is_val, Func<DataHandler, OwnedIterator, Dictionary<string, float>> test_func)
         {
             callbacks.on_test_begin();
 
-            var results = new Dictionary<string, float>();
-            var logs = results;
+            var logs = new Dictionary<string, float>();
             foreach (var (epoch, iterator) in data_handler.enumerate_epochs())
             {
                 reset_metrics();
-                callbacks.on_epoch_begin(epoch);
-                // data_handler.catch_stop_iteration();
-
                 foreach (var step in data_handler.steps())
                 {
                     callbacks.on_test_batch_begin(step);
-
-                    logs = test_func(data_handler, iterator.next());
-
-                    tf_with(ops.control_dependencies(Array.Empty<object>()), ctl => _train_counter.assign_add(1));
-
+                    logs = test_func(data_handler, iterator);
                     var end_step = step + data_handler.StepIncrement;
                     if (!is_val)
                         callbacks.on_test_batch_end(end_step, logs);
                 }
-
-                if (!is_val)
-                    callbacks.on_epoch_end(epoch, logs);
             }
-
-            foreach (var log in logs)
-            {
-                results[log.Key] = log.Value;
-            }
-
+            callbacks.on_test_end(logs);
+            var results = new Dictionary<string, float>(logs);
             return results;
         }
 
-        Dictionary<string, float> test_function(DataHandler data_handler, Tensor[] data)
+        Dictionary<string, float> test_function(DataHandler data_handler, OwnedIterator iterator)
         {
-            var (x, y) = data_handler.DataAdapter.Expand1d(data[0], data[1]);
-
-            var y_pred = Apply(x, training: false);
-            var loss = compiled_loss.Call(y, y_pred);
-
-            compiled_metrics.update_state(y, y_pred);
-
-            var outputs = metrics.Select(x => (x.Name, x.result())).ToDictionary(x => x.Name, x => (float)x.Item2);
+            var data = iterator.next();
+            var outputs = test_step(data_handler, data[0], data[1]);
+            tf_with(ops.control_dependencies(new object[0]), ctl => _test_counter.assign_add(1));
             return outputs;
         }
 
-        Dictionary<string, float> test_step_multi_inputs_function(DataHandler data_handler, Tensor[] data)
+        Dictionary<string, float> test_step_multi_inputs_function(DataHandler data_handler, OwnedIterator iterator)
         {
+            var data = iterator.next();
             var x_size = data_handler.DataAdapter.GetDataset().FirstInputTensorCount;
-            var outputs = train_step(data_handler, new Tensors(data.Take(x_size).ToArray()), new Tensors(data.Skip(x_size).ToArray()));
-            tf_with(ops.control_dependencies(new object[0]), ctl => _train_counter.assign_add(1));
+            var outputs = test_step(data_handler, data.Take(x_size).ToArray(), data.Skip(x_size).ToArray());
+            tf_with(ops.control_dependencies(new object[0]), ctl => _test_counter.assign_add(1));
             return outputs;
         }
+
+
+        Dictionary<string, float> test_step(DataHandler data_handler, Tensors x, Tensors y)
+        {
+            (x, y) = data_handler.DataAdapter.Expand1d(x, y);
+            var y_pred = Apply(x, training: false);
+            var loss = compiled_loss.Call(y, y_pred);
+            compiled_metrics.update_state(y, y_pred);
+            return metrics.Select(x => (x.Name, x.result())).ToDictionary(x => x.Item1, x => (float)x.Item2);
+        }
     }
 }
diff --git a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
index 68dc5976c..d6f89d8be 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
@@ -142,6 +142,7 @@ public History fit(IDatasetV2 dataset,
             int verbose = 1,
             List<ICallback> callbacks = null,
             IDatasetV2 validation_data = null,
+            int validation_step = 10,   // 间隔多少次会进行一次验证
             bool shuffle = true,
             int initial_epoch = 0,
             int max_queue_size = 10,
@@ -164,11 +165,11 @@ public History fit(IDatasetV2 dataset,
             });
 
 
-            return FitInternal(data_handler, epochs, verbose, callbacks, validation_data: validation_data,
+            return FitInternal(data_handler, epochs, validation_step, verbose, callbacks, validation_data: validation_data,
                     train_step_func: train_step_function);
         }
 
-        History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICallback> callbackList, IDatasetV2 validation_data, 
+        History FitInternal(DataHandler data_handler, int epochs, int validation_step, int verbose, List<ICallback> callbackList, IDatasetV2 validation_data, 
             Func<DataHandler, OwnedIterator, Dictionary<string, float>> train_step_func)
         {
             stop_training = false;
@@ -207,6 +208,9 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICal
 
                 if (validation_data != null)
                 {
+                    if (validation_step > 0 && epoch ==0 || (epoch) % validation_step != 0)
+                        continue;
+
                     var val_logs = evaluate(validation_data);
                     foreach(var log in val_logs)
                     {
@@ -220,6 +224,10 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICal
 
                 GC.Collect();
                 GC.WaitForPendingFinalizers();
+                if (stop_training)
+                {
+                    break;
+                }
             }
 
             return callbacks.History;
@@ -266,7 +274,7 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICal
                 {
                     // Because evaluate calls call_test_batch_end, this interferes with our output on the screen
                     // so we need to pass a is_val parameter to stop on_test_batch_end
-                    var val_logs = evaluate((Tensor)validation_data.Value.Item1, validation_data.Value.Item2, is_val:true);
+                    var val_logs = evaluate(validation_data.Value.Item1, validation_data.Value.Item2, is_val:true);
                     foreach (var log in val_logs)
                     {
                         logs["val_" + log.Key] = log.Value;
@@ -279,6 +287,10 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICal
 
                 GC.Collect();
                 GC.WaitForPendingFinalizers();
+                if (stop_training)
+                {
+                    break;
+                }
             }
 
             return callbacks.History;
@@ -335,6 +347,10 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICal
 
                 GC.Collect();
                 GC.WaitForPendingFinalizers();
+                if (stop_training)
+                {
+                    break;
+                }
             }
 
             return callbacks.History;
diff --git a/src/TensorFlowNET.Keras/Engine/Model.Train.cs b/src/TensorFlowNET.Keras/Engine/Model.Train.cs
index 48c16e181..ad3c70d2d 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Train.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Train.cs
@@ -29,7 +29,9 @@ Dictionary<string, float> train_step_multi_inputs_function(DataHandler data_hand
         /// <summary>
         /// The logic for one training step.
         /// </summary>
-        /// <param name="data"></param>
+        /// <param name="data_handler"></param>
+        /// <param name="x"></param>
+        /// <param name="y"></param>
         /// <returns></returns>
         Dictionary<string, float> train_step(DataHandler data_handler, Tensors x, Tensors y)
         {
diff --git a/src/TensorFlowNET.Keras/GlobalUsing.cs b/src/TensorFlowNET.Keras/GlobalUsing.cs
index bc0798ede..85cd9194c 100644
--- a/src/TensorFlowNET.Keras/GlobalUsing.cs
+++ b/src/TensorFlowNET.Keras/GlobalUsing.cs
@@ -4,4 +4,5 @@
 global using System.Linq;
 global using static Tensorflow.Binding;
 global using static Tensorflow.KerasApi;
-global using Tensorflow.NumPy;
\ No newline at end of file
+global using Tensorflow.NumPy;
+global using Tensorflow.Keras.Engine;
\ No newline at end of file
diff --git a/src/TensorFlowNET.Keras/KerasInterface.cs b/src/TensorFlowNET.Keras/KerasInterface.cs
index 159564aac..6bc381095 100644
--- a/src/TensorFlowNET.Keras/KerasInterface.cs
+++ b/src/TensorFlowNET.Keras/KerasInterface.cs
@@ -72,8 +72,8 @@ public Sequential Sequential(params ILayer[] layers)
         /// <summary>
         /// `Model` groups layers into an object with training and inference features.
         /// </summary>
-        /// <param name="input"></param>
-        /// <param name="output"></param>
+        /// <param name="inputs"></param>
+        /// <param name="outputs"></param>
         /// <returns></returns>
         public IModel Model(Tensors inputs, Tensors outputs, string name = null)
             => new Functional(inputs, outputs, name: name);
diff --git a/src/TensorFlowNET.Keras/Layers/Attention/BaseDenseAttention.cs b/src/TensorFlowNET.Keras/Layers/Attention/BaseDenseAttention.cs
index 19b292727..970a938d2 100644
--- a/src/TensorFlowNET.Keras/Layers/Attention/BaseDenseAttention.cs
+++ b/src/TensorFlowNET.Keras/Layers/Attention/BaseDenseAttention.cs
@@ -1,24 +1,18 @@
 using Tensorflow.Keras.Engine;
 using Tensorflow.Keras.ArgsDefinition;
-using static Tensorflow.Binding;
-using static Tensorflow.KerasApi;
 using System;
 using System.Collections.Generic;
 using System.Linq;
 using Tensorflow.Keras.Saving;
 using Tensorflow.Common.Types;
 
-/// <summary>
-/// Base class for attention layers that can be used in sequence DNN/CNN models.
-///This file follows the terminology of https://arxiv.org/abs/1706.03762 Figure 2.
-///Attention is formed by three tensors: Query, Key and Value.
-/// </summary>
-
 namespace Tensorflow.Keras.Layers
 {
 
     /// <summary>
     /// Base Attention class for Dense networks.
+    /// This file follows the terminology of https://arxiv.org/abs/1706.03762 Figure 2.
+    /// Attention is formed by three tensors: Query, Key and Value.
     /// This class is suitable for Dense or CNN networks, and not for RNN networks.
     /// Implementations of attention mechanisms should inherit from this class, and
     /// reuse the `apply_attention_scores()` method.
diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.Activation.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.Activation.cs
index 280e91e2c..2c55f8fd5 100644
--- a/src/TensorFlowNET.Keras/Layers/LayersApi.Activation.cs
+++ b/src/TensorFlowNET.Keras/Layers/LayersApi.Activation.cs
@@ -10,14 +10,14 @@ public partial class LayersApi {
             public ILayer ELU ( float alpha = 0.1f )
                   => new ELU(new ELUArgs { Alpha = alpha });
             public ILayer SELU ()
-                  => new SELU(new LayerArgs { });
+                  => new SELU(new SELUArgs { });
             public ILayer Softmax(int axis = -1) => new Softmax(new SoftmaxArgs { axis = axis });
             public ILayer Softmax ( Axis axis ) => new Softmax(new SoftmaxArgs { axis = axis });
-            public ILayer Softplus () => new Softplus(new LayerArgs { });
-            public ILayer HardSigmoid () => new HardSigmoid(new LayerArgs { });
-            public ILayer Softsign () => new Softsign(new LayerArgs { });
-            public ILayer Swish () => new Swish(new LayerArgs { });
-            public ILayer Tanh () => new Tanh(new LayerArgs { });
-            public ILayer Exponential () => new Exponential(new LayerArgs { });
+            public ILayer Softplus () => new Softplus(new SoftplusArgs { });
+            public ILayer HardSigmoid () => new HardSigmoid(new HardSigmoidArgs { });
+            public ILayer Softsign () => new Softsign(new SoftsignArgs { });
+            public ILayer Swish () => new Swish(new SwishArgs { });
+            public ILayer Tanh () => new Tanh(new TanhArgs { });
+            public ILayer Exponential () => new Exponential(new ExponentialArgs { });
       }
 }
diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.Merging.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.Merging.cs
index d94bfb4d8..bf06b1418 100644
--- a/src/TensorFlowNET.Keras/Layers/LayersApi.Merging.cs
+++ b/src/TensorFlowNET.Keras/Layers/LayersApi.Merging.cs
@@ -14,7 +14,7 @@ public partial class LayersApi
         /// <param name="axis">Axis along which to concatenate.</param>
         /// <returns></returns>
         public ILayer Concatenate(int axis = -1)
-            => new Concatenate(new MergeArgs
+            => new Concatenate(new ConcatenateArgs
             {
                 Axis = axis
             });
diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
index 0bdcbc841..928e7e337 100644
--- a/src/TensorFlowNET.Keras/Layers/LayersApi.cs
+++ b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
@@ -2,9 +2,8 @@
 using Tensorflow.Framework.Models;
 using Tensorflow.Keras.ArgsDefinition;
 using Tensorflow.Keras.ArgsDefinition.Core;
-using Tensorflow.Keras.ArgsDefinition.Rnn;
 using Tensorflow.Keras.Engine;
-using Tensorflow.Keras.Layers.Rnn;
+using Tensorflow.Keras.Layers;
 using Tensorflow.NumPy;
 using static Tensorflow.Binding;
 using static Tensorflow.KerasApi;
@@ -183,9 +182,6 @@ public ILayer Conv2D(int filters,
         /// <param name="use_bias">Boolean, whether the layer uses a bias vector.</param>
         /// <param name="kernel_initializer">The name of the initializer for the kernel weights matrix (see keras.initializers).</param>
         /// <param name="bias_initializer">The name of the initializer for the bias vector (see keras.initializers).</param>
-        /// <param name="kernel_regularizer">The name of the regularizer function applied to the kernel weights matrix (see keras.regularizers).</param>
-        /// <param name="bias_regularizer">The name of the regularizer function applied to the bias vector (see keras.regularizers).</param>
-        /// <param name="activity_regularizer">The name of the regularizer function applied to the output of the layer (its "activation") (see keras.regularizers).</param>
         /// <returns>A tensor of rank 4+ representing activation(conv2d(inputs, kernel) + bias).</returns>
         public ILayer Conv2D(int filters,
             Shape kernel_size = null,
@@ -244,7 +240,7 @@ public ILayer Conv2DTranspose(int filters,
             string kernel_regularizer = null,
             string bias_regularizer = null,
             string activity_regularizer = null)
-                => new Conv2DTranspose(new Conv2DArgs
+                => new Conv2DTranspose(new Conv2DTransposeArgs
                 {
                     Rank = 2,
                     Filters = filters,
@@ -469,7 +465,7 @@ public ILayer Flatten(string data_format = null)
         /// In this case, values of 'None' in the 'shape' argument represent ragged dimensions. For more information about RaggedTensors, see this guide.
         /// </param>
         /// <returns>A tensor.</returns>
-        public Tensors Input(Shape shape = null,
+        public KerasTensor Input(Shape shape = null,
             int batch_size = -1,
             string name = null,
             TF_DataType dtype = TF_DataType.DtInvalid, 
@@ -572,7 +568,7 @@ public ILayer MaxPooling1D(int? pool_size = null,
             int? strides = null,
             string padding = "valid",
             string data_format = null)
-            => new MaxPooling1D(new Pooling1DArgs
+            => new MaxPooling1D(new MaxPooling1DArgs
             {
                 PoolSize = pool_size ?? 2,
                 Strides = strides ?? (pool_size ?? 2),
@@ -788,7 +784,7 @@ public IRnnCell LSTMCell(int uints,
             string recurrent_activation = "sigmoid",
             bool use_bias = true,
             string kernel_initializer = "glorot_uniform",
-            string recurrent_initializer = "orthogonal", // TODO(Wanglongzhi2001),glorot_uniform has not been developed.
+            string recurrent_initializer = "orthogonal",
             string bias_initializer = "zeros",
             bool unit_forget_bias = true,
             float dropout = 0f,
@@ -873,6 +869,118 @@ public ILayer LSTM(int units,
                     UnitForgetBias = unit_forget_bias
                 });
 
+        /// <summary>
+        /// Cell class for the GRU layer.
+        /// </summary>
+        /// <param name="units"></param>
+        /// <param name="activation"></param>
+        /// <param name="recurrent_activation"></param>
+        /// <param name="use_bias"></param>
+        /// <param name="kernel_initializer"></param>
+        /// <param name="recurrent_initializer"></param>
+        /// <param name="bias_initializer"></param>
+        /// <param name="dropout"></param>
+        /// <param name="recurrent_dropout"></param>
+        /// <param name="reset_after"></param>
+        /// <returns></returns>
+        public IRnnCell GRUCell(
+            int units,
+            string activation = "tanh",
+            string recurrent_activation = "sigmoid",
+            bool use_bias = true,
+            string kernel_initializer = "glorot_uniform",
+            string recurrent_initializer = "orthogonal",
+            string bias_initializer = "zeros",
+            float dropout = 0f,
+            float recurrent_dropout = 0f,
+            bool reset_after = true)
+            => new GRUCell(new GRUCellArgs
+            {
+                Units = units,
+                Activation = keras.activations.GetActivationFromName(activation),
+                RecurrentActivation = keras.activations.GetActivationFromName(recurrent_activation),
+                KernelInitializer = GetInitializerByName(kernel_initializer),
+                RecurrentInitializer = GetInitializerByName(recurrent_initializer),
+                BiasInitializer = GetInitializerByName(bias_initializer),
+                UseBias = use_bias,
+                Dropout = dropout,
+                RecurrentDropout = recurrent_dropout,
+                ResetAfter = reset_after
+            });
+
+        /// <summary>
+        /// Gated Recurrent Unit - Cho et al. 2014.
+        /// </summary>
+        /// <param name="units">Positive integer, dimensionality of the output space.</param>
+        /// <param name="activation">Activation function to use. If you pass `None`, no activation is applied.(ie. "linear" activation: `a(x) = x`).</param>
+        /// <param name="recurrent_activation">Activation function to use for the recurrent step. If you pass `None`, no activation is applied. (ie. "linear" activation: `a(x) = x`).</param>
+        /// <param name="use_bias">Boolean, (default `True`), whether the layer uses a bias vector.</param>
+        /// <param name="kernel_initializer">Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. Default: `glorot_uniform`.</param>
+        /// <param name="recurrent_initializer">Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state. Default: `orthogonal`.</param>
+        /// <param name="bias_initializer">Initializer for the bias vector. Default: `zeros`.</param>
+        /// <param name="dropout">Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. Default: 0.</param>
+        /// <param name="recurrent_dropout">Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. Default: 0.</param>
+        /// <param name="implementation"></param>
+        /// <param name="return_sequences">Boolean. Whether to return the last output in the output sequence, or the full sequence. Default: `False`.</param>
+        /// <param name="return_state">Boolean. Whether to return the last state in addition to the output. Default: `False`.</param>
+        /// <param name="go_backwards">Boolean (default `False`). If True, process the input sequence backwards and return the reversed sequence.</param>
+        /// <param name="stateful">Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch.</param>
+        /// <param name="unroll">Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used. Unrolling can speed-up a RNN,</param>
+        /// <param name="time_major">The shape format of the `inputs` and `outputs` tensors.</param>
+        /// <param name="reset_after">GRU convention (whether to apply reset gate after or before matrix multiplication). False = "before", True = "after" (default and cuDNN compatible).</param>
+        /// <returns></returns>
+        public ILayer GRU(
+            int units,
+            string activation = "tanh",
+            string recurrent_activation = "sigmoid",
+            bool use_bias = true,
+            string kernel_initializer = "glorot_uniform",
+            string recurrent_initializer = "orthogonal",
+            string bias_initializer = "zeros",
+            float dropout = 0f,
+            float recurrent_dropout = 0f,
+            bool return_sequences = false,
+            bool return_state = false,
+            bool go_backwards = false,
+            bool stateful = false,
+            bool unroll = false,
+            bool time_major = false,
+            bool reset_after = true
+            )
+                => new GRU(new GRUArgs
+                {
+                    Units = units,
+                    Activation = keras.activations.GetActivationFromName(activation),
+                    RecurrentActivation = keras.activations.GetActivationFromName(recurrent_activation),
+                    KernelInitializer = GetInitializerByName(kernel_initializer),
+                    RecurrentInitializer = GetInitializerByName(recurrent_initializer),
+                    BiasInitializer = GetInitializerByName(bias_initializer),
+                    UseBias = use_bias,
+                    Dropout = dropout,
+                    RecurrentDropout = recurrent_dropout,
+                    ReturnSequences = return_sequences,
+                    ReturnState = return_state,
+                    GoBackwards = go_backwards,
+                    Stateful = stateful,
+                    TimeMajor = time_major,
+                    Unroll = unroll,
+                    ResetAfter = reset_after
+                });
+
+        public ILayer Bidirectional(
+        ILayer layer,
+        string merge_mode = "concat",
+        NDArray weights = null,
+        ILayer backward_layer = null)
+        => new Bidirectional(new BidirectionalArgs
+        {
+            Layer = layer,
+            MergeMode = merge_mode,
+            Weights = weights,
+            BackwardLayer = backward_layer
+        });
+
+
         /// <summary>
         /// 
         /// </summary>
@@ -895,21 +1003,21 @@ public ILayer Rescaling(float scale,
         /// </summary>
         /// <returns></returns>
         public ILayer Add()
-            => new Add(new MergeArgs { });
+            => new Add(new AddArgs { });
 
         /// <summary>
         /// 
         /// </summary>
         /// <returns></returns>
         public ILayer Subtract()
-            => new Subtract(new MergeArgs { });
+            => new Subtract(new SubtractArgs { });
 
         /// <summary>
         /// Global max pooling operation for spatial data.
         /// </summary>
         /// <returns></returns>
         public ILayer GlobalAveragePooling2D()
-            => new GlobalAveragePooling2D(new Pooling2DArgs { });
+            => new GlobalAveragePooling2D(new GlobalAveragePooling2DArgs { });
 
         /// <summary>
         /// Global average pooling operation for temporal data.
@@ -919,7 +1027,7 @@ public ILayer GlobalAveragePooling2D()
         /// </param>
         /// <returns></returns>
         public ILayer GlobalAveragePooling1D(string data_format = "channels_last")
-            => new GlobalAveragePooling1D(new Pooling1DArgs { DataFormat = data_format });
+            => new GlobalAveragePooling1D(new GlobalAveragePooling1DArgs { DataFormat = data_format });
 
         /// <summary>
         /// Global max pooling operation for spatial data.
@@ -928,7 +1036,7 @@ public ILayer GlobalAveragePooling1D(string data_format = "channels_last")
         /// channels_last corresponds to inputs with shape (batch, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, height, width).</param>
         /// <returns></returns>
         public ILayer GlobalAveragePooling2D(string data_format = "channels_last")
-            => new GlobalAveragePooling2D(new Pooling2DArgs { DataFormat = data_format });
+            => new GlobalAveragePooling2D(new GlobalAveragePooling2DArgs { DataFormat = data_format });
 
         /// <summary>
         /// Global max pooling operation for 1D temporal data.
@@ -939,7 +1047,7 @@ public ILayer GlobalAveragePooling2D(string data_format = "channels_last")
         /// </param>
         /// <returns></returns>
         public ILayer GlobalMaxPooling1D(string data_format = "channels_last")
-            => new GlobalMaxPooling1D(new Pooling1DArgs { DataFormat = data_format });
+            => new GlobalMaxPooling1D(new GlobalMaxPooling1DArgs { DataFormat = data_format });
 
         /// <summary>
         /// Global max pooling operation for spatial data.
@@ -948,7 +1056,7 @@ public ILayer GlobalMaxPooling1D(string data_format = "channels_last")
         /// channels_last corresponds to inputs with shape (batch, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, height, width).</param>
         /// <returns></returns>
         public ILayer GlobalMaxPooling2D(string data_format = "channels_last")
-            => new GlobalMaxPooling2D(new Pooling2DArgs { DataFormat = data_format });
+            => new GlobalMaxPooling2D(new GlobalMaxPooling2DArgs { DataFormat = data_format });
 
         /// <summary>
         /// Get an weights initializer from its name.
@@ -983,5 +1091,9 @@ public ILayer Normalization(Shape? input_shape = null, int? axis = -1, float? me
                 Variance = variance,
                 Invert = invert
             });
+
+
+
+
     }
 }
diff --git a/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping1D.cs b/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping1D.cs
index 312854388..7d5385e6f 100644
--- a/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping1D.cs
+++ b/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping1D.cs
@@ -2,7 +2,6 @@
 using Tensorflow.Keras.Engine;
 using Tensorflow.Keras.Saving;
 using Tensorflow.Common.Types;
-using Tensorflow.Common.Types;
 
 namespace Tensorflow.Keras.Layers.Reshaping
 {
diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/BaseWrapper.cs b/src/TensorFlowNET.Keras/Layers/Rnn/BaseWrapper.cs
new file mode 100644
index 000000000..737f88cd4
--- /dev/null
+++ b/src/TensorFlowNET.Keras/Layers/Rnn/BaseWrapper.cs
@@ -0,0 +1,33 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Text;
+using Tensorflow.Keras.ArgsDefinition;
+using Tensorflow.Keras.Saving;
+
+namespace Tensorflow.Keras.Layers
+{
+    /// <summary>
+    /// Abstract wrapper base class. Wrappers take another layer and augment it in various ways.
+    /// Do not use this class as a layer, it is only an abstract base class.
+    /// Two usable wrappers are the `TimeDistributed` and `Bidirectional` wrappers.
+    /// </summary>
+    public abstract class Wrapper: Layer
+    {
+        public ILayer _layer;
+        public Wrapper(WrapperArgs args):base(args)
+        {
+            _layer = args.Layer;
+        }
+
+        public virtual void Build(KerasShapesWrapper input_shape)
+        {
+            if (!_layer.Built)
+            {
+                _layer.build(input_shape);
+            }
+            built = true;
+        }
+
+    }
+}
diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/Bidirectional.cs b/src/TensorFlowNET.Keras/Layers/Rnn/Bidirectional.cs
new file mode 100644
index 000000000..0566b08ad
--- /dev/null
+++ b/src/TensorFlowNET.Keras/Layers/Rnn/Bidirectional.cs
@@ -0,0 +1,285 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Tensorflow.Common.Types;
+using Tensorflow.Keras.ArgsDefinition;
+using Tensorflow.Keras.Saving;
+
+namespace Tensorflow.Keras.Layers
+{
+    /// <summary>
+    /// Bidirectional wrapper for RNNs.
+    /// </summary>
+    public class Bidirectional: Wrapper
+    {
+        int _num_constants = 0;
+        bool _support_masking = true;
+        bool _return_state;
+        bool _stateful;
+        bool _return_sequences;
+        BidirectionalArgs _args;
+        RNNArgs _layer_args_copy;
+        RNN _forward_layer;
+        RNN _backward_layer;
+        RNN _layer;
+        InputSpec _input_spec;
+        public Bidirectional(BidirectionalArgs args):base(args)
+        {
+            _args = args;
+            if (_args.Layer is not ILayer)
+                throw new ValueError(
+                "Please initialize `Bidirectional` layer with a " +
+                $"`tf.keras.layers.Layer` instance. Received: {_args.Layer}");
+
+            if (_args.BackwardLayer is not null && _args.BackwardLayer is not ILayer)
+                throw new ValueError(
+                "`backward_layer` need to be a `tf.keras.layers.Layer` " +
+                $"instance. Received: {_args.BackwardLayer}");
+            if (!new List<string> { "sum", "mul", "ave", "concat", null }.Contains(_args.MergeMode))
+            {
+                throw new ValueError(
+                    $"Invalid merge mode. Received: {_args.MergeMode}. " +
+                    "Merge mode should be one of " +
+                    "{\"sum\", \"mul\", \"ave\", \"concat\", null}"
+                );
+            }
+            if (_args.Layer is RNN)
+            {
+                _layer = _args.Layer as RNN;
+            }
+            else
+            {
+                throw new ValueError(
+                    "Bidirectional only support RNN instance such as LSTM or GRU");
+            }
+            _return_state = _layer.Args.ReturnState;
+            _return_sequences = _layer.Args.ReturnSequences;
+            _stateful = _layer.Args.Stateful;
+            _layer_args_copy = _layer.Args.Clone();
+            // We don't want to track `layer` since we're already tracking the two
+            // copies of it we actually run.
+            // TODO(Wanglongzhi2001), since the feature of setattr_tracking has not been implemented.
+            // _setattr_tracking = false;
+            // super().__init__(layer, **kwargs)
+            // _setattr_tracking = true;
+
+            // Recreate the forward layer from the original layer config, so that it
+            // will not carry over any state from the layer.
+            if (_layer is LSTM)
+            {
+                var arg = _layer_args_copy as LSTMArgs;
+                _forward_layer = new LSTM(arg);
+            }
+            else if(_layer is SimpleRNN)
+            {
+                var arg = _layer_args_copy as SimpleRNNArgs;
+                _forward_layer = new SimpleRNN(arg);
+            }
+            // TODO(Wanglongzhi2001), add GRU if case.
+            else
+            {
+                _forward_layer = new RNN(_layer.Cell, _layer_args_copy);
+            }
+            //_forward_layer = _recreate_layer_from_config(_layer);
+            if (_args.BackwardLayer is null)
+            {
+                _backward_layer = _recreate_layer_from_config(_layer, go_backwards:true);
+            }
+            else
+            {
+                _backward_layer = _args.BackwardLayer as RNN;
+            }
+            _forward_layer.Name = "forward_" + _forward_layer.Name;
+            _backward_layer.Name = "backward_" + _backward_layer.Name;
+            _verify_layer_config();
+
+            void force_zero_output_for_mask(RNN layer)
+            {
+                layer.Args.ZeroOutputForMask = layer.Args.ReturnSequences;
+            }
+
+            force_zero_output_for_mask(_forward_layer);
+            force_zero_output_for_mask(_backward_layer);
+
+            if (_args.Weights is not null)
+            {
+                var nw = len(_args.Weights);
+                _forward_layer.set_weights(_args.Weights[$":,{nw / 2}"]);
+                _backward_layer.set_weights(_args.Weights[$"{nw / 2},:"]);
+            }
+
+            _input_spec = _layer.InputSpec;
+        }
+
+        private void _verify_layer_config()
+        {
+            if (_forward_layer.Args.GoBackwards == _backward_layer.Args.GoBackwards)
+            {
+                throw new ValueError(
+                    "Forward layer and backward layer should have different " +
+                    "`go_backwards` value." +
+                    "forward_layer.go_backwards = " +
+                    $"{_forward_layer.Args.GoBackwards}," +
+                    "backward_layer.go_backwards = " +
+                    $"{_backward_layer.Args.GoBackwards}");
+            }
+            if (_forward_layer.Args.Stateful != _backward_layer.Args.Stateful)
+            {
+                throw new ValueError(
+                    "Forward layer and backward layer are expected to have "+
+                    $"the same value for attribute stateful, got "+
+                    $"{_forward_layer.Args.Stateful} for forward layer and "+
+                    $"{_backward_layer.Args.Stateful} for backward layer");
+            }
+            if (_forward_layer.Args.ReturnState != _backward_layer.Args.ReturnState)
+            {
+                throw new ValueError(
+                    "Forward layer and backward layer are expected to have " +
+                    $"the same value for attribute return_state, got " +
+                    $"{_forward_layer.Args.ReturnState} for forward layer and " +
+                    $"{_backward_layer.Args.ReturnState} for backward layer");
+            }
+            if (_forward_layer.Args.ReturnSequences != _backward_layer.Args.ReturnSequences)
+            {
+                throw new ValueError(
+                    "Forward layer and backward layer are expected to have " +
+                    $"the same value for attribute return_sequences, got " +
+                    $"{_forward_layer.Args.ReturnSequences} for forward layer and " +
+                    $"{_backward_layer.Args.ReturnSequences} for backward layer");
+            }
+        }
+
+        private RNN _recreate_layer_from_config(RNN layer, bool go_backwards = false)
+        {
+            var config = layer.get_config() as RNNArgs;
+            var cell = layer.Cell;
+            if (go_backwards)
+            {
+                config.GoBackwards = !config.GoBackwards;
+            }
+
+            if (layer is LSTM)
+            {
+                var arg = config as LSTMArgs;
+                return new LSTM(arg);
+            }
+            else if(layer is SimpleRNN)
+            {
+                var arg = config as SimpleRNNArgs;
+                return new SimpleRNN(arg);
+            }
+            // TODO(Wanglongzhi2001), add GRU if case.
+            else
+            {
+                return new RNN(cell, config);
+            }
+        }
+
+        public override void build(KerasShapesWrapper input_shape)
+        {
+            _buildInputShape = input_shape;
+            tf_with(ops.name_scope(_forward_layer.Name), scope=>
+            {
+                _forward_layer.build(input_shape);
+            });
+            tf_with(ops.name_scope(_backward_layer.Name), scope =>
+            {
+                _backward_layer.build(input_shape);
+            });
+            built = true;
+        }
+
+        protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null)
+        {
+            // `Bidirectional.call` implements the same API as the wrapped `RNN`.
+            Tensors forward_inputs;
+            Tensors backward_inputs;
+            Tensors forward_state;
+            Tensors backward_state;
+            // if isinstance(inputs, list) and len(inputs) > 1:
+            if (inputs.Length > 1)
+            {
+                // initial_states are keras tensors, which means they are passed
+                // in together with inputs as list. The initial_states need to be
+                // split into forward and backward section, and be feed to layers
+                // accordingly.
+                forward_inputs = new Tensors { inputs[0] };
+                backward_inputs = new Tensors { inputs[0] };
+                var pivot = (len(inputs) - _num_constants) / 2 + 1;
+                // add forward initial state
+                forward_inputs.Concat(new Tensors { inputs[$"1:{pivot}"] });
+                if (_num_constants != 0)
+                    // add backward initial state
+                    backward_inputs.Concat(new Tensors { inputs[$"{pivot}:"] });
+                else
+                {
+                    // add backward initial state
+                    backward_inputs.Concat(new Tensors { inputs[$"{pivot}:{-_num_constants}"] });
+                    // add constants for forward and backward layers
+                    forward_inputs.Concat(new Tensors { inputs[$"{-_num_constants}:"] });
+                    backward_inputs.Concat(new Tensors { inputs[$"{-_num_constants}:"] });
+                }
+                forward_state = null;
+                backward_state = null;
+            }
+            else if (state is not null)
+            {
+                // initial_states are not keras tensors, eg eager tensor from np
+                // array.  They are only passed in from kwarg initial_state, and
+                // should be passed to forward/backward layer via kwarg
+                // initial_state as well.
+                forward_inputs = inputs;
+                backward_inputs = inputs;
+                var half = len(state) / 2;
+                forward_state = state[$":{half}"];
+                backward_state = state[$"{half}:"];
+            }
+            else
+            {
+                forward_inputs = inputs;
+                backward_inputs = inputs;
+                forward_state = null;
+                backward_state = null;
+            }
+            var y = _forward_layer.Apply(forward_inputs, forward_state);
+            var y_rev = _backward_layer.Apply(backward_inputs, backward_state);
+
+            Tensors states = new();
+            if (_return_state)
+            {
+                states = y["1:"] + y_rev["1:"];
+                y = y[0];
+                y_rev = y_rev[0];
+            }
+
+            if (_return_sequences)
+            {
+                int time_dim = _forward_layer.Args.TimeMajor ? 0 : 1;
+                y_rev = keras.backend.reverse(y_rev, time_dim);
+            }
+            Tensors output;
+            if (_args.MergeMode == "concat")
+                output = keras.backend.concatenate(new Tensors { y.Single(), y_rev.Single() });
+            else if (_args.MergeMode == "sum")
+                output = y.Single() + y_rev.Single();
+            else if (_args.MergeMode == "ave")
+                output = (y.Single() + y_rev.Single()) / 2;
+            else if (_args.MergeMode == "mul")
+                output = y.Single() * y_rev.Single();
+            else if (_args.MergeMode is null)
+                output = new Tensors { y.Single(), y_rev.Single() };
+            else
+                throw new ValueError(
+                        "Unrecognized value for `merge_mode`. " +
+                        $"Received: {_args.MergeMode}" +
+                        "Expected values are [\"concat\", \"sum\", \"ave\", \"mul\"]");
+            if (_return_state)
+            {
+                if (_args.MergeMode is not null)
+                    return new Tensors { output.Single(), states.Single()};
+            }
+            return output;
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/DropoutRNNCellMixin.cs b/src/TensorFlowNET.Keras/Layers/Rnn/DropoutRNNCellMixin.cs
index 75feb8ea2..27c13f349 100644
--- a/src/TensorFlowNET.Keras/Layers/Rnn/DropoutRNNCellMixin.cs
+++ b/src/TensorFlowNET.Keras/Layers/Rnn/DropoutRNNCellMixin.cs
@@ -6,7 +6,7 @@
 using Tensorflow.Keras.Engine;
 using Tensorflow.Keras.Utils;
 
-namespace Tensorflow.Keras.Layers.Rnn
+namespace Tensorflow.Keras.Layers
 {
     public abstract class DropoutRNNCellMixin: Layer, IRnnCell
     {
diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/GRU.cs b/src/TensorFlowNET.Keras/Layers/Rnn/GRU.cs
new file mode 100644
index 000000000..0919883d2
--- /dev/null
+++ b/src/TensorFlowNET.Keras/Layers/Rnn/GRU.cs
@@ -0,0 +1,168 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+using Tensorflow.Keras.ArgsDefinition;
+using Tensorflow.Common.Extensions;
+using Tensorflow.Common.Types;
+using Tensorflow.Keras.Saving;
+
+
+namespace Tensorflow.Keras.Layers
+{
+    public class GRU : RNN
+    {
+        GRUArgs _args;
+        private static GRUCell _cell;
+
+        bool _return_runtime;
+        public GRUCell Cell { get => _cell; }
+        public int units { get => _args.Units; }
+        public Activation activation { get => _args.Activation; }
+        public Activation recurrent_activation { get => _args.RecurrentActivation; }
+        public bool use_bias { get => _args.UseBias; }
+        public float dropout { get => _args.Dropout; }
+        public float recurrent_dropout { get => _args.RecurrentDropout; }
+        public IInitializer kernel_initializer { get => _args.KernelInitializer; }
+        public IInitializer recurrent_initializer { get => _args.RecurrentInitializer; }
+        public IInitializer bias_initializer { get => _args.BiasInitializer; }
+        public int implementation { get => _args.Implementation; }
+        public bool reset_after { get => _args.ResetAfter; }
+
+        public GRU(GRUArgs args) : base(CreateCell(args), PreConstruct(args))
+        {
+            _args = args;
+
+            if (_args.Implementation == 0)
+            {
+                // Use the red output to act as a warning message that can also be used under the release version
+                Console.ForegroundColor = ConsoleColor.Red; 
+                Console.WriteLine("Warning: `implementation=0` has been deprecated, "+
+                    "and now defaults to `implementation=2`."+
+                    "Please update your layer call.");
+                Console.ResetColor();
+            }
+
+            GRUCell cell = new GRUCell(new GRUCellArgs
+            {
+                Units = _args.Units,
+                Activation = _args.Activation,
+                RecurrentActivation = _args.RecurrentActivation,
+                UseBias = _args.UseBias,
+                Dropout = _args.Dropout,
+                RecurrentDropout = _args.RecurrentDropout,
+                KernelInitializer = _args.KernelInitializer,
+                RecurrentInitializer = _args.RecurrentInitializer,
+                BiasInitializer = _args.BiasInitializer,
+                ResetAfter = _args.ResetAfter,
+                Implementation = _args.Implementation
+            });
+            _cell = cell;
+        }
+
+        protected override Tensors Call(Tensors inputs, Tensors initial_state = null, bool? training = null, IOptionalArgs? optional_args = null)
+        {
+            GRUOptionalArgs? gru_optional_args = optional_args as GRUOptionalArgs;
+            if (optional_args is not null && gru_optional_args is null)
+            {
+                throw new ArgumentException("The type of optional args should be `GRUOptionalArgs`.");
+            }
+            Tensors? mask = gru_optional_args?.Mask;
+
+            // Not support ragger input temporarily;
+            int row_length = 0;
+            bool is_ragged_input = false;
+
+            _validate_args_if_ragged(is_ragged_input, mask);
+
+            // GRU does not support constants.Ignore it during process.
+             (inputs, initial_state, _) = this._process_inputs(inputs, initial_state, null);
+
+            if (mask.Length > 1)
+            {
+                mask = mask[0];
+            }
+
+            var input_shape = inputs.shape;
+            var timesteps = _args.TimeMajor ? input_shape[0] : input_shape[1];
+
+
+            // TODO(Wanglongzhi2001), finish _could_use_gpu_kernel part
+            Func<Tensors, Tensors, (Tensors, Tensors)> step = (cell_inputs, cell_states) =>
+            {
+                var res = Cell.Apply(cell_inputs, cell_states, training is null ? true : training.Value);
+                var (output, state) = res;
+                return (output, state);
+            };
+
+            var (last_output, outputs, states) = keras.backend.rnn(
+                step,
+                inputs,
+                initial_state,
+                constants: null,
+                go_backwards: _args.GoBackwards,
+                mask: mask,
+                unroll: _args.Unroll,
+                input_length: ops.convert_to_tensor(timesteps),
+                time_major: _args.TimeMajor,
+                zero_output_for_mask: base.Args.ZeroOutputForMask,
+                return_all_outputs: _args.ReturnSequences
+            );
+
+            Tensors output;
+            if (_args.ReturnSequences)
+            {
+                output = outputs;   
+            }
+            else
+            {
+                output = last_output;
+            }
+
+            if (_args.ReturnState)
+            {
+                output = new Tensors { output, states };
+            }
+            return output;
+        }
+
+        private static IRnnCell CreateCell(GRUArgs gruArgs)
+        {
+            return new GRUCell(new GRUCellArgs
+            {
+                Units = gruArgs.Units,
+                Activation = gruArgs.Activation,
+                RecurrentActivation = gruArgs.RecurrentActivation,
+                UseBias = gruArgs.UseBias,
+                Dropout = gruArgs.Dropout,
+                RecurrentDropout = gruArgs.RecurrentDropout,
+                KernelInitializer = gruArgs.KernelInitializer,
+                RecurrentInitializer = gruArgs.RecurrentInitializer,
+                BiasInitializer = gruArgs.BiasInitializer,
+                ResetAfter = gruArgs.ResetAfter,
+                Implementation = gruArgs.Implementation
+            });
+        }
+
+        private static RNNArgs PreConstruct(GRUArgs args)
+        {
+            return new RNNArgs
+            {
+                ReturnSequences = args.ReturnSequences,
+                ReturnState = args.ReturnState,
+                GoBackwards = args.GoBackwards,
+                Stateful = args.Stateful,
+                Unroll = args.Unroll,
+                TimeMajor = args.TimeMajor,
+                Units = args.Units,
+                Activation = args.Activation,
+                RecurrentActivation = args.RecurrentActivation,
+                UseBias = args.UseBias,
+                Dropout = args.Dropout,
+                RecurrentDropout = args.RecurrentDropout,
+                KernelInitializer = args.KernelInitializer,
+                RecurrentInitializer = args.RecurrentInitializer,
+                BiasInitializer = args.BiasInitializer
+            };
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/GRUCell.cs b/src/TensorFlowNET.Keras/Layers/Rnn/GRUCell.cs
new file mode 100644
index 000000000..2b9c01e31
--- /dev/null
+++ b/src/TensorFlowNET.Keras/Layers/Rnn/GRUCell.cs
@@ -0,0 +1,281 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Text;
+using Tensorflow.Keras.ArgsDefinition;
+using Tensorflow.Common.Extensions;
+using Tensorflow.Common.Types;
+using Tensorflow.Keras.Saving;
+
+namespace Tensorflow.Keras.Layers
+{
+    /// <summary>
+    /// Cell class for the GRU layer.
+    /// </summary>
+    public class GRUCell : DropoutRNNCellMixin
+    {
+        GRUCellArgs _args;
+        IVariableV1 _kernel;
+        IVariableV1 _recurrent_kernel;
+        IInitializer _bias_initializer;
+        IVariableV1 _bias;
+        INestStructure<long> _state_size;
+        INestStructure<long> _output_size;
+        int Units;
+        public override INestStructure<long> StateSize => _state_size;
+
+        public override INestStructure<long> OutputSize => _output_size;
+
+        public override bool SupportOptionalArgs => false;
+        public GRUCell(GRUCellArgs args) : base(args)
+        {
+            _args = args;
+            if (_args.Units <= 0)
+            {
+                throw new ValueError(
+                            $"units must be a positive integer, got {args.Units}");
+            }
+            _args.Dropout = Math.Min(1f, Math.Max(0f, _args.Dropout));
+            _args.RecurrentDropout = Math.Min(1f, Math.Max(0f, this._args.RecurrentDropout));
+            if (_args.RecurrentDropout != 0f && _args.Implementation != 1)
+            {
+                Debug.WriteLine("RNN `implementation=2` is not supported when `recurrent_dropout` is set." +
+                    "Using `implementation=1`.");
+                _args.Implementation = 1;
+            }
+            Units = _args.Units;
+            _state_size = new NestList<long>(Units);
+            _output_size = new NestNode<long>(Units);
+        }
+
+        public override void build(KerasShapesWrapper input_shape)
+        {
+            //base.build(input_shape);
+
+            var single_shape = input_shape.ToSingleShape();
+            var input_dim = single_shape[-1];
+
+            _kernel = add_weight("kernel", (input_dim, _args.Units * 3),
+                initializer: _args.KernelInitializer
+            );
+
+            _recurrent_kernel = add_weight("recurrent_kernel", (Units, Units * 3),
+                initializer: _args.RecurrentInitializer
+            );
+            if (_args.UseBias)
+            {
+                Shape bias_shape;
+                if (!_args.ResetAfter)
+                {
+                    bias_shape = new Shape(3 * Units);
+                }
+                else
+                {
+                    bias_shape = (2, 3 *  Units);
+                }
+                _bias = add_weight("bias", bias_shape,
+                    initializer: _bias_initializer
+                    );
+            }
+            built = true;
+        }
+
+        protected override Tensors Call(Tensors inputs, Tensors states = null, bool? training = null, IOptionalArgs? optional_args = null)
+        {
+            var h_tm1 = states.IsNested() ? states[0] : states.Single();
+            var dp_mask = get_dropout_mask_for_cell(inputs, training.Value, count: 3);
+            var rec_dp_mask = get_recurrent_dropout_mask_for_cell(h_tm1, training.Value, count: 3);
+
+            IVariableV1 input_bias = _bias;
+            IVariableV1 recurrent_bias = _bias;
+            if (_args.UseBias)
+            {
+                if (!_args.ResetAfter)
+                {
+                    input_bias = _bias;
+                    recurrent_bias = null;
+                }
+                else
+                {
+                    input_bias = tf.Variable(tf.unstack(_bias.AsTensor())[0]);
+                    recurrent_bias = tf.Variable(tf.unstack(_bias.AsTensor())[1]);
+                }
+            }
+
+
+            Tensor hh;
+            Tensor z;
+            if ( _args.Implementation == 1)
+            {
+                Tensor inputs_z;
+                Tensor inputs_r;
+                Tensor inputs_h;
+                if (0f < _args.Dropout && _args.Dropout < 1f)
+                {
+                    inputs_z = inputs * dp_mask[0];
+                    inputs_r = inputs * dp_mask[1];
+                    inputs_h = inputs * dp_mask[2];
+                }
+                else
+                {
+                    inputs_z = inputs.Single();
+                    inputs_r = inputs.Single();
+                    inputs_h = inputs.Single();
+                }
+
+
+                int startIndex = (int)_kernel.AsTensor().shape[0];
+                var _kernel_slice = tf.slice(_kernel.AsTensor(),
+                    new[] { 0, 0 }, new[] { startIndex, Units });
+                var x_z = math_ops.matmul(inputs_z, _kernel_slice);
+                _kernel_slice = tf.slice(_kernel.AsTensor(),
+                    new[] { 0, Units }, new[] { Units, Units});
+                var x_r = math_ops.matmul(
+                    inputs_r, _kernel_slice);
+                int endIndex = (int)_kernel.AsTensor().shape[1];
+                _kernel_slice = tf.slice(_kernel.AsTensor(),
+                    new[] { 0, Units * 2 }, new[] { startIndex, endIndex - Units * 2 });
+                var x_h = math_ops.matmul(inputs_h, _kernel_slice);
+
+                if(_args.UseBias)
+                {
+                    x_z = tf.nn.bias_add(
+                        x_z, tf.Variable(input_bias.AsTensor()[$":{Units}"]));
+                    x_r = tf.nn.bias_add(
+                        x_r, tf.Variable(input_bias.AsTensor()[$"{Units}:{Units * 2}"]));
+                    x_h = tf.nn.bias_add(
+                        x_h, tf.Variable(input_bias.AsTensor()[$"{Units * 2}:"]));
+                }
+
+                Tensor h_tm1_z;
+                Tensor h_tm1_r;
+                Tensor h_tm1_h;
+                if (0f < _args.RecurrentDropout && _args.RecurrentDropout < 1f)
+                {
+                    h_tm1_z = h_tm1 * rec_dp_mask[0];
+                    h_tm1_r = h_tm1 * rec_dp_mask[1];
+                    h_tm1_h = h_tm1 * rec_dp_mask[2];
+                }
+                else
+                {
+                    h_tm1_z = h_tm1;
+                    h_tm1_r = h_tm1;
+                    h_tm1_h = h_tm1;
+                }
+
+                startIndex = (int)_recurrent_kernel.AsTensor().shape[0];
+                var _recurrent_kernel_slice = tf.slice(_recurrent_kernel.AsTensor(),
+                    new[] { 0, 0 }, new[] { startIndex, Units });
+                var recurrent_z = math_ops.matmul(
+                    h_tm1_z, _recurrent_kernel_slice);
+                _recurrent_kernel_slice = tf.slice(_recurrent_kernel.AsTensor(),
+                    new[] { 0, Units }, new[] { startIndex, Units});
+                var recurrent_r = math_ops.matmul(
+                    h_tm1_r, _recurrent_kernel_slice);
+                if(_args.ResetAfter && _args.UseBias)
+                {
+                    recurrent_z = tf.nn.bias_add(
+                        recurrent_z, tf.Variable(recurrent_bias.AsTensor()[$":{Units}"]));
+                    recurrent_r = tf.nn.bias_add(
+                        recurrent_r, tf.Variable(recurrent_bias.AsTensor()[$"{Units}: {Units * 2}"]));
+                }
+                z = _args.RecurrentActivation.Apply(x_z + recurrent_z);
+                var r = _args.RecurrentActivation.Apply(x_r + recurrent_r);
+
+                Tensor recurrent_h;
+                if (_args.ResetAfter)
+                {
+                    endIndex = (int)_recurrent_kernel.AsTensor().shape[1];
+                    _recurrent_kernel_slice = tf.slice(_recurrent_kernel.AsTensor(),
+                        new[] { 0, Units * 2 }, new[] { startIndex, endIndex - Units * 2 });
+                    recurrent_h = math_ops.matmul(
+                        h_tm1_h, _recurrent_kernel_slice);
+                    if(_args.UseBias)
+                    {
+                        recurrent_h = tf.nn.bias_add(
+                            recurrent_h, tf.Variable(recurrent_bias.AsTensor()[$"{Units * 2}:"]));
+                    }
+                    recurrent_h *= r;
+                }
+                else
+                {
+                    _recurrent_kernel_slice = tf.slice(_recurrent_kernel.AsTensor(),
+                        new[] { 0, Units * 2 }, new[] { startIndex, endIndex - Units * 2 });
+                    recurrent_h = math_ops.matmul(
+                        r * h_tm1_h, _recurrent_kernel_slice);
+                }
+                hh = _args.Activation.Apply(x_h + recurrent_h);
+            }
+            else
+            {
+                if (0f < _args.Dropout && _args.Dropout < 1f)
+                {
+                    inputs = inputs * dp_mask[0];
+                }
+
+                var matrix_x = math_ops.matmul(inputs, _kernel.AsTensor());
+                if(_args.UseBias)
+                {
+                    matrix_x = tf.nn.bias_add(matrix_x, input_bias);
+                }
+                var matrix_x_spilted = tf.split(matrix_x, 3, axis: -1);
+                var x_z = matrix_x_spilted[0];
+                var x_r = matrix_x_spilted[1];
+                var x_h = matrix_x_spilted[2];
+
+                Tensor matrix_inner;
+                if (_args.ResetAfter)
+                {
+                    matrix_inner = math_ops.matmul(h_tm1, _recurrent_kernel.AsTensor());
+                    if ( _args.UseBias)
+                    {
+                        matrix_inner = tf.nn.bias_add(
+                            matrix_inner, recurrent_bias);
+                    }
+                }
+                else
+                {
+                    var startIndex = (int)_recurrent_kernel.AsTensor().shape[0];
+                    var _recurrent_kernel_slice = tf.slice(_recurrent_kernel.AsTensor(),
+                        new[] { 0, 0 }, new[] { startIndex, Units * 2 });
+                    matrix_inner = math_ops.matmul(
+                        h_tm1, _recurrent_kernel_slice);
+                }
+
+                var matrix_inner_splitted = tf.split(matrix_inner, new int[] {Units, Units, -1}, axis:-1);
+                var recurrent_z = matrix_inner_splitted[0];
+                var recurrent_r = matrix_inner_splitted[0];
+                var recurrent_h = matrix_inner_splitted[0];
+
+                z = _args.RecurrentActivation.Apply(x_z + recurrent_z);
+                var r = _args.RecurrentActivation.Apply(x_r + recurrent_r);
+
+                if(_args.ResetAfter)
+                {
+                    recurrent_h = r * recurrent_h;
+                }
+                else
+                {
+                    var startIndex = (int)_recurrent_kernel.AsTensor().shape[0];
+                    var endIndex = (int)_recurrent_kernel.AsTensor().shape[1];
+                    var _recurrent_kernel_slice = tf.slice(_recurrent_kernel.AsTensor(),
+                        new[] { 0, 2*Units }, new[] { startIndex, endIndex - 2 * Units });
+                    recurrent_h = math_ops.matmul(
+                        r * h_tm1, _recurrent_kernel_slice);
+                }
+                hh = _args.Activation.Apply(x_h + recurrent_h);
+            }
+            var h = z * h_tm1 + (1 - z) * hh;
+            if (states.IsNested())
+            {
+                var new_state = new NestList<Tensor>(h);
+                return new Nest<Tensor>(new INestStructure<Tensor>[] { new NestNode<Tensor>(h), new_state }).ToTensors();
+            }
+            else
+            {
+                return new Nest<Tensor>(new INestStructure<Tensor>[] { new NestNode<Tensor>(h), new NestNode<Tensor>(h)}).ToTensors();
+            }
+
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/LSTM.cs b/src/TensorFlowNET.Keras/Layers/Rnn/LSTM.cs
index 025465fd6..c766e8d69 100644
--- a/src/TensorFlowNET.Keras/Layers/Rnn/LSTM.cs
+++ b/src/TensorFlowNET.Keras/Layers/Rnn/LSTM.cs
@@ -1,10 +1,11 @@
 ﻿using System.Linq;
-using Tensorflow.Keras.ArgsDefinition.Rnn;
+using Tensorflow.Keras.ArgsDefinition;
 using Tensorflow.Keras.Engine;
 using Tensorflow.Common.Types;
 using Tensorflow.Common.Extensions;
+using Tensorflow.Keras.Saving;
 
-namespace Tensorflow.Keras.Layers.Rnn
+namespace Tensorflow.Keras.Layers
 {
     /// <summary>
     /// Long Short-Term Memory layer - Hochreiter 1997.
@@ -14,15 +15,15 @@ namespace Tensorflow.Keras.Layers.Rnn
     /// </summary>
     public class LSTM : RNN
     {
-        LSTMArgs args;
+        LSTMArgs _args;
         InputSpec[] _state_spec;
         InputSpec _input_spec;
         bool _could_use_gpu_kernel;
-
+        public LSTMArgs Args { get => _args; }
         public LSTM(LSTMArgs args) :
             base(CreateCell(args), args)
         {
-            this.args = args;
+            _args = args;
             _input_spec = new InputSpec(ndim: 3);
             _state_spec = new[] { args.Units, args.Units }.Select(dim => new InputSpec(shape: (-1, dim))).ToArray();
             _could_use_gpu_kernel = args.Activation == keras.activations.Tanh
@@ -71,7 +72,7 @@ protected override Tensors Call(Tensors inputs, Tensors initial_state = null, bo
 
             var single_input = inputs.Single;
             var input_shape = single_input.shape;
-            var timesteps = args.TimeMajor ? input_shape[0] : input_shape[1];
+            var timesteps = _args.TimeMajor ? input_shape[0] : input_shape[1];
 
             _maybe_reset_cell_dropout_mask(Cell);
 
@@ -87,26 +88,26 @@ protected override Tensors Call(Tensors inputs, Tensors initial_state = null, bo
                 inputs,
                 initial_state,
                 constants: null,
-                go_backwards: args.GoBackwards,
+                go_backwards: _args.GoBackwards,
                 mask: mask,
-                unroll: args.Unroll,
+                unroll: _args.Unroll,
                 input_length: ops.convert_to_tensor(timesteps),
-                time_major: args.TimeMajor,
-                zero_output_for_mask: args.ZeroOutputForMask,
-                return_all_outputs: args.ReturnSequences
+                time_major: _args.TimeMajor,
+                zero_output_for_mask: _args.ZeroOutputForMask,
+                return_all_outputs: _args.ReturnSequences
             );
 
             Tensor output;
-            if (args.ReturnSequences)
+            if (_args.ReturnSequences)
             {
-                output = keras.backend.maybe_convert_to_ragged(false, outputs, (int)timesteps, args.GoBackwards);
+                output = keras.backend.maybe_convert_to_ragged(false, outputs, (int)timesteps, _args.GoBackwards);
             }
             else
             {
                 output = last_output;
             }
 
-            if (args.ReturnState)
+            if (_args.ReturnState)
             {
                 return new Tensor[] { output }.Concat(states).ToArray().ToTensors();
             }
@@ -115,5 +116,11 @@ protected override Tensors Call(Tensors inputs, Tensors initial_state = null, bo
                 return output;
             }
         }
+
+        public override IKerasConfig get_config()
+        {
+            return _args;
+        }
+
     }
 }
diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/LSTMCell.cs b/src/TensorFlowNET.Keras/Layers/Rnn/LSTMCell.cs
index 284a2b778..e4fc6dd22 100644
--- a/src/TensorFlowNET.Keras/Layers/Rnn/LSTMCell.cs
+++ b/src/TensorFlowNET.Keras/Layers/Rnn/LSTMCell.cs
@@ -3,12 +3,12 @@
 using System.Diagnostics;
 using Tensorflow.Common.Extensions;
 using Tensorflow.Common.Types;
-using Tensorflow.Keras.ArgsDefinition.Rnn;
+using Tensorflow.Keras.ArgsDefinition;
 using Tensorflow.Keras.Engine;
 using Tensorflow.Keras.Saving;
 using Tensorflow.Keras.Utils;
 
-namespace Tensorflow.Keras.Layers.Rnn
+namespace Tensorflow.Keras.Layers
 {
     /// <summary>
     /// Cell class for the LSTM layer.
diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs b/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs
index f86de8a85..fec75559c 100644
--- a/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs
+++ b/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs
@@ -3,7 +3,6 @@
 using System.Collections.Generic;
 using System.Reflection;
 using Tensorflow.Keras.ArgsDefinition;
-using Tensorflow.Keras.ArgsDefinition.Rnn;
 using Tensorflow.Keras.Engine;
 using Tensorflow.Keras.Saving;
 using Tensorflow.Util;
@@ -14,7 +13,7 @@
 using System.Runtime.CompilerServices;
 // from tensorflow.python.distribute import distribution_strategy_context as ds_context;
 
-namespace Tensorflow.Keras.Layers.Rnn
+namespace Tensorflow.Keras.Layers
 {
     /// <summary>
     /// Base class for recurrent layers.
@@ -26,13 +25,15 @@ public class RNN : RnnBase
         private RNNArgs _args;
         private object _input_spec = null; // or NoneValue??
         private object _state_spec = null;
-        private Tensors _states = null;
         private object _constants_spec = null;
+        private Tensors _states = null;
         private int _num_constants;
         protected IVariableV1 _kernel;
         protected IVariableV1 _bias;
         private IRnnCell _cell;
-        protected IRnnCell Cell
+
+        public RNNArgs Args { get => _args; }
+        public IRnnCell Cell
         {
             get
             {
@@ -185,6 +186,7 @@ private Tensors compute_mask(Tensors inputs, Tensors mask)
 
         public override void build(KerasShapesWrapper input_shape)
         {
+            _buildInputShape = input_shape;
             input_shape = new KerasShapesWrapper(input_shape.Shapes[0]);
 
             InputSpec get_input_spec(Shape shape)
@@ -242,10 +244,9 @@ object get_state_spec(Shape shape)
         /// 
         /// </summary>
         /// <param name="inputs"></param>
-        /// <param name="mask">Binary tensor of shape [batch_size, timesteps] indicating whether a given timestep should be masked</param>
-        /// <param name="training"></param>
         /// <param name="initial_state">List of initial state tensors to be passed to the first call of the cell</param>
-        /// <param name="constants">List of constant tensors to be passed to the cell at each timestep</param>
+        /// <param name="training"></param>
+        /// <param name="optional_args"></param>
         /// <returns></returns>
         /// <exception cref="ValueError"></exception>
         /// <exception cref="NotImplementedException"></exception>
@@ -393,7 +394,7 @@ protected override Tensors Call(Tensors inputs, Tensors initial_state = null, bo
             }
         }
 
-        public override Tensors Apply(Tensors inputs, Tensors initial_states = null, bool training = false, IOptionalArgs? optional_args = null)
+        public override Tensors Apply(Tensors inputs, Tensors initial_states = null, bool? training = false, IOptionalArgs? optional_args = null)
         {
             RnnOptionalArgs? rnn_optional_args = optional_args as RnnOptionalArgs;
             if (optional_args is not null && rnn_optional_args is null)
@@ -468,7 +469,7 @@ public override Tensors Apply(Tensors inputs, Tensors initial_states = null, boo
             return (inputs, initial_state, constants);
         }
 
-        private void _validate_args_if_ragged(bool is_ragged_input, Tensors mask)
+        protected void _validate_args_if_ragged(bool is_ragged_input, Tensors mask)
         {
             if (!is_ragged_input)
             {
@@ -527,54 +528,19 @@ public Tensors __call__(Tensors inputs, Tensor state = null, Tensor training = n
             throw new NotImplementedException();
         }
 
-        // 好像不能cell不能传接口类型
-        //public RNN New(IRnnArgCell cell,
-        //    bool return_sequences = false,
-        //    bool return_state = false,
-        //    bool go_backwards = false,
-        //    bool stateful = false,
-        //    bool unroll = false,
-        //    bool time_major = false)
-        //        => new RNN(new RNNArgs
-        //        {
-        //            Cell = cell,
-        //            ReturnSequences = return_sequences,
-        //            ReturnState = return_state,
-        //            GoBackwards = go_backwards,
-        //            Stateful = stateful,
-        //            Unroll = unroll,
-        //            TimeMajor = time_major
-        //        });
-
-        //public RNN New(List<IRnnArgCell> cell,
-        //    bool return_sequences = false,
-        //    bool return_state = false,
-        //    bool go_backwards = false,
-        //    bool stateful = false,
-        //    bool unroll = false,
-        //    bool time_major = false)
-        //        => new RNN(new RNNArgs
-        //        {
-        //            Cell = cell,
-        //            ReturnSequences = return_sequences,
-        //            ReturnState = return_state,
-        //            GoBackwards = go_backwards,
-        //            Stateful = stateful,
-        //            Unroll = unroll,
-        //            TimeMajor = time_major
-        //        });
-
-
         protected Tensors get_initial_state(Tensors inputs)
         {
             var input = inputs[0];
             var input_shape = array_ops.shape(inputs);
             var batch_size = _args.TimeMajor ? input_shape[1] : input_shape[0];
             var dtype = input.dtype;
-
             Tensors init_state = Cell.GetInitialState(null, batch_size, dtype);
-
             return init_state;
         }
+
+        public override IKerasConfig get_config()
+        {
+            return _args;
+        }
     }
 }
diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/RnnBase.cs b/src/TensorFlowNET.Keras/Layers/Rnn/RnnBase.cs
index 018b17780..1419da4b2 100644
--- a/src/TensorFlowNET.Keras/Layers/Rnn/RnnBase.cs
+++ b/src/TensorFlowNET.Keras/Layers/Rnn/RnnBase.cs
@@ -4,7 +4,7 @@
 using Tensorflow.Keras.ArgsDefinition;
 using Tensorflow.Keras.Engine;
 
-namespace Tensorflow.Keras.Layers.Rnn
+namespace Tensorflow.Keras.Layers
 {
     public abstract class RnnBase: Layer
     {
diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNN.cs b/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNN.cs
index a22f31c7d..9c199eb43 100644
--- a/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNN.cs
+++ b/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNN.cs
@@ -1,11 +1,11 @@
 ﻿using System.Data;
-using Tensorflow.Keras.ArgsDefinition.Rnn;
+using Tensorflow.Keras.ArgsDefinition;
 using Tensorflow.Keras.Saving;
 using Tensorflow.Operations.Activation;
 using static HDF.PInvoke.H5Z;
 using static Tensorflow.ApiDef.Types;
 
-namespace Tensorflow.Keras.Layers.Rnn
+namespace Tensorflow.Keras.Layers
 {
     public class SimpleRNN : RNN
     {
diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNNCell.cs b/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNNCell.cs
index c77f77790..e74b56925 100644
--- a/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNNCell.cs
+++ b/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNNCell.cs
@@ -1,7 +1,7 @@
 ﻿using System;
 using System.Collections.Generic;
 using System.Text;
-using Tensorflow.Keras.ArgsDefinition.Rnn;
+using Tensorflow.Keras.ArgsDefinition;
 using Tensorflow.Keras.Engine;
 using Tensorflow.Keras.Saving;
 using Tensorflow.Common.Types;
@@ -9,7 +9,7 @@
 using Tensorflow.Keras.Utils;
 using Tensorflow.Graphs;
 
-namespace Tensorflow.Keras.Layers.Rnn
+namespace Tensorflow.Keras.Layers
 {
     /// <summary>
     /// Cell class for SimpleRNN.
diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/StackedRNNCells.cs b/src/TensorFlowNET.Keras/Layers/Rnn/StackedRNNCells.cs
index 8799bfb23..ece2bc5bf 100644
--- a/src/TensorFlowNET.Keras/Layers/Rnn/StackedRNNCells.cs
+++ b/src/TensorFlowNET.Keras/Layers/Rnn/StackedRNNCells.cs
@@ -3,12 +3,12 @@
 using System.Linq;
 using Tensorflow.Common.Extensions;
 using Tensorflow.Common.Types;
-using Tensorflow.Keras.ArgsDefinition.Rnn;
+using Tensorflow.Keras.ArgsDefinition;
 using Tensorflow.Keras.Engine;
 using Tensorflow.Keras.Saving;
 using Tensorflow.Keras.Utils;
 
-namespace Tensorflow.Keras.Layers.Rnn
+namespace Tensorflow.Keras.Layers
 {
     public class StackedRNNCells : Layer, IRnnCell
     {
diff --git a/src/TensorFlowNET.Keras/Models/ModelsApi.cs b/src/TensorFlowNET.Keras/Models/ModelsApi.cs
index 44dca58d0..2605c41e3 100644
--- a/src/TensorFlowNET.Keras/Models/ModelsApi.cs
+++ b/src/TensorFlowNET.Keras/Models/ModelsApi.cs
@@ -1,22 +1,15 @@
-﻿using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Text;
-using Tensorflow.Keras.Engine;
-using Tensorflow.Keras.Saving;
+﻿using Tensorflow.Keras.Saving;
 using Tensorflow.Keras.Saving.SavedModel;
-using ThirdParty.Tensorflow.Python.Keras.Protobuf;
 
-namespace Tensorflow.Keras.Models
+namespace Tensorflow.Keras.Models;
+
+public class ModelsApi: IModelsApi
 {
-    public class ModelsApi: IModelsApi
-    {
-        public Functional from_config(FunctionalConfig config)
-            => Functional.from_config(config);
+    public Functional from_config(FunctionalConfig config)
+        => Functional.from_config(config);
 
-        public IModel load_model(string filepath, bool compile = true, LoadOptions? options = null)
-        {
-            return KerasLoadModelUtils.load_model(filepath, compile: compile, options: options) as Model;
-        }
+    public IModel load_model(string filepath, bool compile = true, LoadOptions? options = null)
+    {
+        return KerasLoadModelUtils.load_model(filepath, compile: compile, options: options) as Model;
     }
 }
diff --git a/src/TensorFlowNET.Keras/Optimizers/AdamW.cs b/src/TensorFlowNET.Keras/Optimizers/AdamW.cs
new file mode 100644
index 000000000..d111b5d3a
--- /dev/null
+++ b/src/TensorFlowNET.Keras/Optimizers/AdamW.cs
@@ -0,0 +1,64 @@
+namespace Tensorflow.Keras.Optimizers
+{
+    public class AdamW : Adam
+    {
+        string name;
+        float weight_decay;
+        DeviceDType deType;
+        List<string> no_decay_params = null;
+        public AdamW(float learning_rate= 0.001f,
+                     float weight_decay= 0.004f,
+                     float beta_1= 0.9f,
+                     float beta_2= 0.999f,
+                     float epsilon= 1e-7f,
+                     bool amsgrad = false,
+                     List<string> no_decay_params = null,
+                     string name= "AdamW") : base(learning_rate, beta_1, beta_2, epsilon, amsgrad)
+        {
+            this.name = name;
+            this.weight_decay = weight_decay;
+            this.no_decay_params = no_decay_params;
+        }
+
+        protected Operation _decay_weights_op(IVariableV1 var, float learning_rate, Dictionary<DeviceDType, Dictionary<string, Tensor>> apply_state)
+        {
+            bool do_decay = _do_use_weight_decay(var.Name);
+            if (do_decay) return var.assign_add(
+                -learning_rate * var.AsTensor() * apply_state[deType]["weight_decay"]);
+            return tf.no_op();
+        }
+
+
+        protected bool _do_use_weight_decay(string param_name)
+        {
+            // Whether to use L2 weight decay for `param_name`.
+            if (this.weight_decay == 0)
+                return false;
+
+            if (this.no_decay_params != null)
+            {
+                foreach (var name in no_decay_params)
+                {
+                    if (param_name.Contains(name)) return false;
+                }
+
+            }
+            return true;
+        }
+
+        protected override Operation _resource_apply_dense(IVariableV1 var, Tensor grad, Dictionary<DeviceDType, Dictionary<string, Tensor>> apply_state)
+        {
+            var decay = _decay_weights_op(var, _hyper["learning_rate"], apply_state);
+            tf.control_dependencies(new[] { decay });
+            return base._resource_apply_dense(var, grad, apply_state);
+        }
+
+        protected override void _prepare_local(DeviceDType device_dtype, Dictionary<DeviceDType, Dictionary<string, Tensor>> apply_state)
+        {
+            this.deType = device_dtype;
+            base._prepare_local(device_dtype, apply_state);
+            apply_state[device_dtype]["weight_decay"] = tf.constant(
+                weight_decay, name: "adam_weight_decay_rate");
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Keras/Optimizers/OptimizerApi.cs b/src/TensorFlowNET.Keras/Optimizers/OptimizerApi.cs
index 31eb88be7..a237499f9 100644
--- a/src/TensorFlowNET.Keras/Optimizers/OptimizerApi.cs
+++ b/src/TensorFlowNET.Keras/Optimizers/OptimizerApi.cs
@@ -29,6 +29,22 @@ public IOptimizer Adam(float learning_rate = 0.001f,
                 amsgrad: amsgrad,
                 name: name);
 
+        public IOptimizer AdamW(float learning_rate = 0.001f,
+                float weight_decay = 0.004f,
+                float beta_1 = 0.9f,
+                float beta_2 = 0.999f,
+                float epsilon = 1e-7f,
+                bool amsgrad = false,
+                List<string> no_decay_params = null,
+                string name = "AdamW") => new AdamW(learning_rate: learning_rate,
+                    beta_1: beta_1,
+                    beta_2: beta_2,
+                    epsilon: epsilon,
+                    amsgrad: amsgrad,
+                    name: name,
+                    weight_decay: weight_decay,
+                    no_decay_params: no_decay_params);
+
         /// <summary>
         /// Construct a new RMSprop optimizer.
         /// </summary>
@@ -55,7 +71,7 @@ public IOptimizer RMSprop(float learning_rate = 0.001f,
                 Name = name
             });
 
-        public IOptimizer SGD(float learning_rate)
-            => new SGD(learning_rate);
+        public IOptimizer SGD(float learning_rate = 0.01f, float momentum = 0f)
+            => new SGD(learning_rate, momentum);
     }
 }
diff --git a/src/TensorFlowNET.Keras/Optimizers/SGD.cs b/src/TensorFlowNET.Keras/Optimizers/SGD.cs
index f97f4b15f..1d9ceb810 100644
--- a/src/TensorFlowNET.Keras/Optimizers/SGD.cs
+++ b/src/TensorFlowNET.Keras/Optimizers/SGD.cs
@@ -22,6 +22,8 @@ public SGD(float learning_rate,
             _set_hyper("decay", decay);
 
             _momentum = momentum > 0;
+            if (momentum < 0 || momentum > 1)
+                throw new ValueError($"momentum must be a number between 0 and 1, got {momentum}.");
 
             _set_hyper("momentum", momentum);
 
@@ -30,6 +32,13 @@ public SGD(float learning_rate,
 #pragma warning restore CS1717 // Assignment made to same variable
         }
 
+        protected override void _create_slots(IVariableV1[] var_list)
+        {
+            if (_momentum)
+                foreach (var var in var_list)
+                    add_slot(var, "momentum");
+        }
+
         protected override void _prepare_local(DeviceDType device_dtype,
             Dictionary<DeviceDType, Dictionary<string, Tensor>> _apply_state)
         {
@@ -43,7 +52,15 @@ protected override Operation _resource_apply_dense(IVariableV1 var, Tensor grad,
         {
             if (_momentum)
             {
-                throw new NotImplementedException("_resource_apply_dense");
+                var momentum_var = get_slot(var, "momentum");
+                return gen_training_ops.resource_apply_keras_momentum(
+                    var.Handle,
+                    momentum_var.Handle,
+                    _get_hyper("learning_rate", var.dtype),
+                    grad,
+                    _get_hyper("momentum", var.dtype),
+                    use_locking: _use_locking,
+                    use_nesterov: nesterov);
             }
             var device_dtype = _apply_state.Keys.FirstOrDefault(x => x.Device == var.Device && x.DType == var.dtype.as_base_dtype());
 
diff --git a/src/TensorFlowNET.Keras/Preprocessings/DatasetUtils.get_training_or_validation_split.cs b/src/TensorFlowNET.Keras/Preprocessings/DatasetUtils.get_training_or_validation_split.cs
index 2f3d8f527..18ca404ef 100644
--- a/src/TensorFlowNET.Keras/Preprocessings/DatasetUtils.get_training_or_validation_split.cs
+++ b/src/TensorFlowNET.Keras/Preprocessings/DatasetUtils.get_training_or_validation_split.cs
@@ -6,7 +6,7 @@ namespace Tensorflow.Keras.Preprocessings
     public partial class DatasetUtils
     {
         /// <summary>
-        /// Potentially restict samples & labels to a training or validation split.
+        /// Potentially restict samples and labels to a training or validation split.
         /// </summary>
         /// <param name="samples"></param>
         /// <param name="labels"></param>
diff --git a/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.image_dataset_from_directory.cs b/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.image_dataset_from_directory.cs
index 4acae4265..377ac4de7 100644
--- a/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.image_dataset_from_directory.cs
+++ b/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.image_dataset_from_directory.cs
@@ -8,6 +8,37 @@ public partial class Preprocessing
     {
         public static string[] WHITELIST_FORMATS = new[] { ".bmp", ".gif", ".jpeg", ".jpg", ".png" };
 
+        /// <summary>
+        /// Function that calculates the classification statistics for a given array of classified data. 
+        /// The function takes an array of classified data as input and returns a dictionary containing the count and percentage of each class in the input array. 
+        /// This function can be used to analyze the distribution of classes in a dataset or to evaluate the performance of a classification model.
+        /// </summary>
+        /// <remarks>
+        /// code from copilot
+        /// </remarks>
+        /// <param name="label_ids"></param>
+        /// <param name="label_class_names"></param>
+        Dictionary<string, double> get_classification_statistics(int[] label_ids, string[] label_class_names)
+        {
+            var countDict = label_ids.GroupBy(x => x)
+                        .ToDictionary(g => g.Key, g => g.Count());
+            var totalCount = label_ids.Length;
+            var ratioDict = label_class_names.ToDictionary(name => name,
+                                                    name =>
+                                                    (double)(countDict.ContainsKey(Array.IndexOf(label_class_names, name)) 
+                                                    ? countDict[Array.IndexOf(label_class_names, name)] : 0)
+                                                    / totalCount);
+
+            print("Classification statistics:");
+            foreach (string labelName in label_class_names)
+            {
+                double ratio = ratioDict[labelName];
+                print($"{labelName}: {ratio * 100:F2}%");
+            }
+
+            return ratioDict;
+        }
+
         /// <summary>
         /// Generates a `tf.data.Dataset` from image files in a directory.
         /// https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image_dataset_from_directory
@@ -53,11 +84,13 @@ public IDatasetV2 image_dataset_from_directory(string directory,
                 follow_links: follow_links);
 
             (image_paths, label_list) = keras.preprocessing.dataset_utils.get_training_or_validation_split(image_paths, label_list, validation_split, subset);
+            get_classification_statistics(label_list, class_name_list);
 
             var dataset = paths_and_labels_to_dataset(image_paths, image_size, num_channels, label_list, label_mode, class_name_list.Length, interpolation);
             if (shuffle)
                 dataset = dataset.shuffle(batch_size * 8, seed: seed);
             dataset = dataset.batch(batch_size);
+            dataset.class_names = class_name_list;
             return dataset;
         }
 
diff --git a/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.paths_and_labels_to_dataset.cs b/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.paths_and_labels_to_dataset.cs
index b4d583878..232f81eb5 100644
--- a/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.paths_and_labels_to_dataset.cs
+++ b/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.paths_and_labels_to_dataset.cs
@@ -6,6 +6,32 @@ namespace Tensorflow.Keras
 {
     public partial class Preprocessing
     {
+
+        /// <summary>
+        /// 图片路径转为数据处理用的dataset
+        /// 通常用于预测时读取图片
+        /// </summary>
+        /// <param name="image_paths"></param>
+        /// <param name="image_size"></param>
+        /// <param name="num_channels"></param>
+        /// <param name="interpolation">
+        /// 用于调整大小的插值方法。支持`bilinear`、`nearest`、`bicubic`、`area`、`lanczos3`、`lanczos5`、`gaussian`、`mitchellcubic`。
+        /// 默认为`'bilinear'`。
+        /// </param>
+        /// <returns></returns>
+        public IDatasetV2 paths_to_dataset(string[] image_paths,
+                    Shape image_size,
+                    int num_channels = 3,
+                    int num_classes = 6,
+                    string interpolation = "bilinear")
+        {
+            var path_ds = tf.data.Dataset.from_tensor_slices(image_paths);
+            var img_ds = path_ds.map(x => path_to_image(x, image_size, num_channels, interpolation));
+            var label_ds = dataset_utils.labels_to_dataset(new int[num_classes] , "", num_classes);
+
+            return img_ds;
+        }
+
         public IDatasetV2 paths_and_labels_to_dataset(string[] image_paths,
             Shape image_size,
             int num_channels,
diff --git a/src/TensorFlowNET.Keras/Saving/KerasObjectLoader.cs b/src/TensorFlowNET.Keras/Saving/KerasObjectLoader.cs
index 396ad20eb..0bd816ccb 100644
--- a/src/TensorFlowNET.Keras/Saving/KerasObjectLoader.cs
+++ b/src/TensorFlowNET.Keras/Saving/KerasObjectLoader.cs
@@ -13,7 +13,6 @@
 using Tensorflow.Keras.ArgsDefinition;
 using Tensorflow.Keras.Engine;
 using Tensorflow.Keras.Layers;
-using Tensorflow.Keras.Layers.Rnn;
 using Tensorflow.Keras.Losses;
 using Tensorflow.Keras.Metrics;
 using Tensorflow.Keras.Saving.SavedModel;
@@ -174,7 +173,7 @@ public void del_tracking()
                 }
                 if(node is Functional functional)
                 {
-                    foreach(var name in functional.UnconditionalDependencyNames.Keys)
+                    foreach(var name in functional.UnconditionalDependencyNames.Keys.ToArray())
                     {
                         if(Regex.Match(name, @"^layer(_with_weights)?-[\d+]").Success)
                         {
@@ -693,7 +692,6 @@ private bool _try_build_layer(Layer obj, int node_id, KerasShapesWrapper build_i
         /// Infers input shape of layer from SavedModel functions.
         /// </summary>
         /// <param name="layer_node_id"></param>
-        /// <param name="convert_to_shapes"></param>
         /// <returns></returns>
         private TensorSpec _infer_inputs(int layer_node_id)
         {
diff --git a/src/TensorFlowNET.Keras/Saving/SavedModel/load.cs b/src/TensorFlowNET.Keras/Saving/SavedModel/load.cs
index aa763fc2e..091dbb810 100644
--- a/src/TensorFlowNET.Keras/Saving/SavedModel/load.cs
+++ b/src/TensorFlowNET.Keras/Saving/SavedModel/load.cs
@@ -1,97 +1,89 @@
-﻿using Google.Protobuf;
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Text;
-using Tensorflow.Keras.Engine;
+﻿using System.IO;
 using Tensorflow.Train;
 using ThirdParty.Tensorflow.Python.Keras.Protobuf;
-using static Tensorflow.Binding;
-using static Tensorflow.KerasApi;
 
-namespace Tensorflow.Keras.Saving.SavedModel
+namespace Tensorflow.Keras.Saving.SavedModel;
+
+public class KerasLoadModelUtils
 {
-    public class KerasLoadModelUtils
+    /// <summary>
+    /// Corresponding to keras/saving/save.py/load_model
+    /// </summary>
+    /// <param name="filepath"></param>
+    /// <param name="custom_objects"></param>
+    /// <param name="compile"></param>
+    /// <param name="options"></param>
+    /// <returns></returns>
+    public static Trackable load_model(string filepath, IDictionary<string, object>? custom_objects = null,
+        bool compile = true, LoadOptions? options = null)
     {
-        /// <summary>
-        /// Corresponding to keras/saving/save.py/load_model
-        /// </summary>
-        /// <param name="filepath"></param>
-        /// <param name="custom_objects"></param>
-        /// <param name="compile"></param>
-        /// <param name="options"></param>
-        /// <returns></returns>
-        public static Trackable load_model(string filepath, IDictionary<string, object>? custom_objects = null,
-            bool compile = true, LoadOptions? options = null)
+        using var savingScope = SharedObjectSavingScope.Enter();
+
+        using var ctx = LoadContext.load_context(options);
+
+        if (!File.Exists(filepath) && !Directory.Exists(filepath))
         {
-            using (SharedObjectSavingScope.Enter())
-            {
-                using (LoadContext.load_context(options))
-                {
-                    if (!File.Exists(filepath) && !Directory.Exists(filepath))
-                    {
-                        throw new IOException($"No file or directory found at {filepath}.");
-                    }
-                    if (Directory.Exists(filepath))
-                    {
-                        return load(filepath, compile, options);
-                    }
-                    else
-                    {
-                        throw new NotImplementedException("Model load of h5 format has not been supported. Please submit an issue to https://github.com/SciSharp/TensorFlow.NET/issues if it's needed.");
-                    }
-                }
-            }
+            throw new IOException($"No file or directory found at {filepath}.");
         }
 
-        private static Trackable load(string path, bool compile = true, LoadOptions? options = null)
+        if (Directory.Exists(filepath))
+        {
+            return load(filepath, compile, options);
+        }
+        else
         {
-            SavedMetadata metadata = new SavedMetadata();
-            var meta_graph_def = Loader.parse_saved_model(path).MetaGraphs[0];
-            var object_graph_def = meta_graph_def.ObjectGraphDef;
-            string path_to_metadata_pb = Path.Combine(path, Constants.SAVED_METADATA_PATH);
-            if (File.Exists(path_to_metadata_pb))
-            {
-                metadata.MergeFrom(new FileStream(path_to_metadata_pb, FileMode.Open, FileAccess.Read));
-            }
-            else
-            {
-                throw new NotImplementedException("SavedModel saved prior to TF 2.5 detected when loading Keras model, please" +
-                    " use higher version or submit an issue to https://github.com/SciSharp/TensorFlow.NET/issues. to let us know you need it.");
-            }
+            throw new NotImplementedException("Model load of h5 format has not been supported. Please submit an issue to https://github.com/SciSharp/TensorFlow.NET/issues if it's needed.");
+        }
+    }
 
-            if (metadata.Nodes is null || metadata.Nodes.Count == 0)
-            {
-                return Loader.load(path, options: options) as Model;
-            }
+    private static Trackable load(string path, bool compile = true, LoadOptions? options = null)
+    {
+        SavedMetadata metadata;
+        var meta_graph_def = Loader.parse_saved_model(path).MetaGraphs[0];
+        var object_graph_def = meta_graph_def.ObjectGraphDef;
+        string path_to_metadata_pb = Path.Combine(path, Constants.SAVED_METADATA_PATH);
+        if (File.Exists(path_to_metadata_pb))
+        {
+            using var stream = new FileStream(path_to_metadata_pb, FileMode.Open, FileAccess.Read);
+            metadata = SavedMetadata.Parser.ParseFrom(stream);
+        }
+        else
+        {
+            throw new NotImplementedException("SavedModel saved prior to TF 2.5 detected when loading Keras model, please" +
+                " use higher version or submit an issue to https://github.com/SciSharp/TensorFlow.NET/issues. to let us know you need it.");
+        }
 
-            var keras_loader = new KerasObjectLoader(metadata, object_graph_def);
-            keras_loader.load_layers(compile: compile);
+        if (metadata.Nodes is null || metadata.Nodes.Count == 0)
+        {
+            return Loader.load(path, options: options) as Model;
+        }
 
-            Dictionary<string, (Trackable, Action<object, object, object>)> nodes_to_load = new();
-            nodes_to_load["root"] = (null, null);
-            foreach(var item in keras_loader.LoadedNodes)
-            {
-                nodes_to_load[keras_loader.get_path(item.Key)] = item.Value;
-            }
-            var loaded = Loader.load_partial(path, nodes_to_load, options);
+        var keras_loader = new KerasObjectLoader(metadata, object_graph_def);
+        keras_loader.load_layers(compile: compile);
 
-            keras_loader.finalize_objects();
-            keras_loader.del_tracking();
+        Dictionary<string, (Trackable, Action<object, object, object>)> nodes_to_load = new();
+        nodes_to_load["root"] = (null, null);
+        foreach(var item in keras_loader.LoadedNodes)
+        {
+            nodes_to_load[keras_loader.get_path(item.Key)] = item.Value;
+        }
+        var loaded = Loader.load_partial(path, nodes_to_load, options);
 
-            var model = loaded["root"];
+        keras_loader.finalize_objects();
+        keras_loader.del_tracking();
 
-            if(model is Model && compile)
-            {
-                // TODO(Rinne): implement it.
-            }
+        var model = loaded["root"];
 
-            if (!tf.Context.executing_eagerly())
-            {
-                // TODO(Rinne): implement it.
-            }
+        if (model is Model && compile)
+        {
+            // TODO(Rinne): implement it.
+        }
 
-            return model;
+        if (!tf.Context.executing_eagerly())
+        {
+            // TODO(Rinne): implement it.
         }
+
+        return model;
     }
 }
diff --git a/src/TensorFlowNET.Keras/Saving/SavedModel/serialized_attributes.cs b/src/TensorFlowNET.Keras/Saving/SavedModel/serialized_attributes.cs
index 0ec5d1a8c..325d3327a 100644
--- a/src/TensorFlowNET.Keras/Saving/SavedModel/serialized_attributes.cs
+++ b/src/TensorFlowNET.Keras/Saving/SavedModel/serialized_attributes.cs
@@ -3,7 +3,7 @@
 using System.Linq;
 using System.Text;
 using Tensorflow.Keras.Engine;
-using Tensorflow.Keras.Layers.Rnn;
+using Tensorflow.Keras.Layers;
 using Tensorflow.Keras.Metrics;
 using Tensorflow.Train;
 
diff --git a/src/TensorFlowNET.Keras/Saving/hdf5_format.cs b/src/TensorFlowNET.Keras/Saving/hdf5_format.cs
index b04391be9..bab0efecf 100644
--- a/src/TensorFlowNET.Keras/Saving/hdf5_format.cs
+++ b/src/TensorFlowNET.Keras/Saving/hdf5_format.cs
@@ -7,6 +7,8 @@
 using static Tensorflow.Binding;
 using static Tensorflow.KerasApi;
 using System.Linq;
+using System.Text.RegularExpressions;
+
 namespace Tensorflow.Keras.Saving
 {
     public class hdf5_format
@@ -131,7 +133,7 @@ public static void load_weights_from_hdf5_group(long f, List<ILayer> layers)
                 long g = H5G.open(f, name);
                 var weight_names = load_attributes_from_hdf5_group(g, "weight_names");
                 foreach (var i_ in weight_names)
-                {
+                { 
                     (success, Array result) = Hdf5.ReadDataset<float>(g, i_);
                     if (success)
                         weight_values.Add(np.array(result));
@@ -192,8 +194,13 @@ public static void save_weights_to_hdf5_group(long f, List<ILayer> layers)
                     var tensor = val.AsTensor();
                     if (name.IndexOf("/") > 1)
                     {
-                        var crDataGroup = Hdf5.CreateOrOpenGroup(g, Hdf5Utils.NormalizedName(name.Split('/')[0]));
-                        WriteDataset(crDataGroup, name.Split('/')[1], tensor);
+                        var crDataGroup = g;
+                        string[] name_split = name.Split('/');
+                        for(int i = 0; i < name_split.Length - 1; i++)
+                        {
+                            crDataGroup = Hdf5.CreateOrOpenGroup(crDataGroup, Hdf5Utils.NormalizedName(name_split[i]));
+                        }
+                        WriteDataset(crDataGroup, name_split[name_split.Length - 1], tensor);
                         Hdf5.CloseGroup(crDataGroup);
                     }
                     else
diff --git a/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj b/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj
index 5dc46fe49..36d1bc1d4 100644
--- a/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj
+++ b/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj
@@ -7,26 +7,30 @@
     <Nullable>enable</Nullable>
     <RootNamespace>Tensorflow.Keras</RootNamespace>
     <Platforms>AnyCPU;x64</Platforms>
-    <Version>0.11.0</Version>
+    <Version>0.11.3</Version>
     <Authors>Haiping Chen</Authors>
     <Product>Keras for .NET</Product>
-    <Copyright>Apache 2.0, Haiping Chen 2023</Copyright>
+    <Copyright>Apache 2.0, Haiping Chen since 2018</Copyright>
     <PackageId>TensorFlow.Keras</PackageId>
     <PackageProjectUrl>https://github.com/SciSharp/TensorFlow.NET</PackageProjectUrl>
     <PackageIconUrl>https://avatars3.githubusercontent.com/u/44989469?s=200&amp;v=4</PackageIconUrl>
     <RepositoryUrl>https://github.com/SciSharp/TensorFlow.NET</RepositoryUrl>
-    <PackageReleaseNotes>Keras for .NET is a C# version of Keras ported from the python version.
-
-* Support CIFAR-10 dataset in keras.datasets.
-* Support Conv2D functional API.
-* Support BatchNormalization layer.
-* Building keras model in subclass, functional and sequential api
-* Implemented backward_function.
-* Support model.load_weights.
-* Add Subtract layer
-* Text preprocessing
-* Preprocessing.timeseries_dataset_from_array
-* Fixed memory leak for YOLOv3 model.</PackageReleaseNotes>
+    <PackageReleaseNotes>
+		Keras for .NET is a C# version of Keras ported from the python version.
+
+		* Support CIFAR-10 dataset in keras.datasets.
+		* Support Conv2D functional API.
+		* Support BatchNormalization layer.
+		* Building keras model in subclass, functional and sequential api
+		* Implemented backward_function.
+		* Support model.load_weights.
+		* Add Subtract layer
+		* Text preprocessing
+		* Preprocessing.timeseries_dataset_from_array
+		* Fixed memory leak for YOLOv3 model.
+		* Support RNN and LSTM models
+		* Support Transformer model
+	</PackageReleaseNotes>
     <Description>Keras for .NET
 
 Keras is an API designed for human beings, not machines. Keras follows best practices for reducing cognitive load: it offers consistent &amp; simple APIs, it minimizes the number of user actions required for common use cases, and it provides clear &amp; actionable error messages.</Description>
@@ -36,10 +40,10 @@ Keras is an API designed for human beings, not machines. Keras follows best prac
     <PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
     <PackageOutputPath>packages</PackageOutputPath>
     <RepositoryType>Git</RepositoryType>
-    <SignAssembly>true</SignAssembly>
+    <SignAssembly>False</SignAssembly>
     <AssemblyOriginatorKeyFile>Open.snk</AssemblyOriginatorKeyFile>
-    <AssemblyVersion>0.11.0.0</AssemblyVersion>
-    <FileVersion>0.11.0.0</FileVersion>
+    <AssemblyVersion>0.11.3.0</AssemblyVersion>
+    <FileVersion>0.11.3.0</FileVersion>
     <PackageLicenseFile>LICENSE</PackageLicenseFile>
     <Configurations>Debug;Release;GPU</Configurations>
   </PropertyGroup>
@@ -70,9 +74,77 @@ Keras is an API designed for human beings, not machines. Keras follows best prac
     <DefineConstants />
   </PropertyGroup>
 
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|netstandard2.0|AnyCPU'">
+    <Optimize>True</Optimize>
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1573,1591,1712,8602,8603,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|net6.0|AnyCPU'">
+    <Optimize>True</Optimize>
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1573,1591,1712,8602,8603,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|netstandard2.0|x64'">
+    <Optimize>True</Optimize>
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1573,1591,1712,8602,8603,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|net6.0|x64'">
+    <Optimize>True</Optimize>
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1573,1591,1712,8602,8603,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|netstandard2.0|AnyCPU'">
+    <Optimize>False</Optimize>
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1573,1591,1712,8602,8603,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|net6.0|AnyCPU'">
+    <Optimize>False</Optimize>
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1573,1591,1712,8602,8603,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|netstandard2.0|x64'">
+    <Optimize>False</Optimize>
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1573,1591,1712,8602,8603,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|net6.0|x64'">
+    <Optimize>False</Optimize>
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1573,1591,1712,8602,8603,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='GPU|netstandard2.0|AnyCPU'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1573,1591,1712,8602,8603,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='GPU|netstandard2.0|x64'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1573,1591,1712,8602,8603,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='GPU|net6.0|AnyCPU'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1573,1591,1712,8602,8603,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='GPU|net6.0|x64'">
+    <WarningLevel>1</WarningLevel>
+    <NoWarn>$(NoWarn),1573,1591,1712,8602,8603,8625,CS0612</NoWarn>
+  </PropertyGroup>
+
   <ItemGroup>
-    <PackageReference Include="HDF5-CSharp" Version="1.17.0" />
-    <PackageReference Include="MethodBoundaryAspect.Fody" Version="2.0.148" />
+    <PackageReference Include="HDF5-CSharp" Version="1.18.0" />
+    <PackageReference Include="MethodBoundaryAspect.Fody" Version="2.0.149" />
     <PackageReference Include="SharpZipLib" Version="1.4.2" />
   </ItemGroup>
 
diff --git a/src/TensorFlowNET.Keras/Utils/RnnUtils.cs b/src/TensorFlowNET.Keras/Utils/RnnUtils.cs
index e8700c1f2..1e9f6d845 100644
--- a/src/TensorFlowNET.Keras/Utils/RnnUtils.cs
+++ b/src/TensorFlowNET.Keras/Utils/RnnUtils.cs
@@ -3,7 +3,7 @@
 using System.Diagnostics;
 using System.Text;
 using Tensorflow.Common.Types;
-using Tensorflow.Keras.Layers.Rnn;
+using Tensorflow.Keras.Layers;
 using Tensorflow.Common.Extensions;
 
 namespace Tensorflow.Keras.Utils
diff --git a/src/TensorFlowNET.Keras/Utils/data_utils.cs b/src/TensorFlowNET.Keras/Utils/data_utils.cs
index 5b84c601f..b0bc15540 100644
--- a/src/TensorFlowNET.Keras/Utils/data_utils.cs
+++ b/src/TensorFlowNET.Keras/Utils/data_utils.cs
@@ -39,5 +39,54 @@ public static string get_file(string fname, string origin,
 
             return datadir;
         }
+
+        public static (int[,], long[]) _remove_long_seq(int maxlen, int[,] seq, long[] label)
+        {
+            /*Removes sequences that exceed the maximum length.
+
+            Args:
+                maxlen: Int, maximum length of the output sequences.
+                seq: List of lists, where each sublist is a sequence.
+                label: List where each element is an integer.
+
+            Returns:
+                    new_seq, new_label: shortened lists for `seq` and `label`.
+
+            */
+            var nRow = seq.GetLength(0);
+            var nCol = seq.GetLength(1);
+            List<int[]> new_seq = new List<int[]>();
+            List<long> new_label = new List<long>();
+
+            for (var i = 0; i < nRow; i++)
+            {
+                if (maxlen < nCol && seq[i, maxlen] != 0)
+                    continue;
+                int[] sentence = new int[maxlen];
+                for (var j = 0; j < maxlen && j < nCol; j++)
+                {
+                    sentence[j] = seq[i, j];
+                }
+                new_seq.Add(sentence);
+                new_label.Add(label[i]);
+            }
+
+            int[,] new_seq_array = new int[new_seq.Count, maxlen];
+            long[] new_label_array = new long[new_label.Count];
+
+            for (var i = 0; i < new_seq.Count; i++)
+            {
+                for (var j = 0; j < maxlen; j++)
+                {
+                    new_seq_array[i, j] = new_seq[i][j];
+                }
+            }
+
+            for (var i = 0; i < new_label.Count; i++)
+            {
+                new_label_array[i] = new_label[i];
+            }
+            return (new_seq_array, new_label_array);
+        }
     }
 }
diff --git a/src/TensorFlowNET.Keras/Utils/generic_utils.cs b/src/TensorFlowNET.Keras/Utils/generic_utils.cs
index 6a59fb880..5402f4995 100644
--- a/src/TensorFlowNET.Keras/Utils/generic_utils.cs
+++ b/src/TensorFlowNET.Keras/Utils/generic_utils.cs
@@ -29,6 +29,7 @@ limitations under the License.
 using Tensorflow.Keras.Layers;
 using Tensorflow.Keras.Saving;
 using Tensorflow.Train;
+using System.Text.RegularExpressions;
 
 namespace Tensorflow.Keras.Utils
 {
@@ -126,12 +127,15 @@ public static FunctionalConfig deserialize_model_config(JToken json)
 
         public static string to_snake_case(string name)
         {
-            return string.Concat(name.Select((x, i) =>
+            string intermediate = Regex.Replace(name, "(.)([A-Z][a-z0-9]+)", "$1_$2");
+            string insecure = Regex.Replace(intermediate, "([a-z])([A-Z])", "$1_$2").ToLower();
+
+            if (insecure[0] != '_')
             {
-                return i > 0 && char.IsUpper(x) && !Char.IsDigit(name[i - 1]) ?
-                    "_" + x.ToString() :
-                    x.ToString();
-            })).ToLower();
+                return insecure;
+            }
+
+            return "private" + insecure;
         }
 
         /// <summary>
diff --git a/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs b/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs
index c637cf858..e93324f3e 100644
--- a/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs
@@ -1,5 +1,6 @@
 ﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
 using System;
+using System.Linq;
 using Tensorflow;
 using static Tensorflow.Binding;
 
@@ -23,7 +24,7 @@ public void SimpleWhileLoop()
         private void _testWhileContextHelper(int maximum_iterations)
         {
             // TODO: implement missing code dependencies
-            var sess = this.cached_session();
+            using var sess = this.cached_session();
             var i = constant_op.constant(0, name: "i");
             var c = new Func<Tensor, Tensor>(x => gen_math_ops.less(x, ops.convert_to_tensor(10), name: "c"));
             var b = new Func<Tensor, Tensor>(x => math_ops.add(x, 1, name: "c"));
diff --git a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
index f240817b4..fc2280051 100644
--- a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
@@ -5,6 +5,7 @@
 using System.Linq;
 using Tensorflow;
 using static Tensorflow.Binding;
+using Tensorflow.Framework;
 
 namespace TensorFlowNET.UnitTest.Gradient
 {
@@ -388,81 +389,77 @@ public void testBoundaryStop()
 
         }
 
-        [Ignore("TODO")]
         [TestMethod]
         public void testBoundaryContinue()
         {
-            //@test_util.run_v1_only("b/120545219")
-            //def testBoundaryContinue(self):
-            //  # Test that we differentiate both 'x' and 'y' correctly when x is a
-            //  # predecessor of y.
-            //  with self.cached_session():
-            //    x = constant(1.0)
-            //    y = x * 2.0
-            //    z = y * 3.0
-            //    grads = gradients.gradients(z, [x, y])
-            //    self.assertTrue(all(x is not None for x in grads))
-            //    self.assertEqual(6.0, grads[0].eval())
+            // Test that we differentiate both 'x' and 'y' correctly when x is a
+            // predecessor of y.
 
+            //TODO: @test_util.run_v1_only("b/120545219")
+
+            using (self.cached_session())
+            {
+                var x = tf.constant(1.0);
+                var y = x * 2.0;
+                var z = y * 3.0;
+                var grads = tf.gradients(z, new[] { x, y });
+                self.assertTrue(all(grads.Select(x => x != null)));
+                self.assertEqual(6.0, grads[0].eval());
+            }
         }
 
-        [Ignore("TODO")]
         [TestMethod]
         public void testAggregationMethodAccumulateN()
         {
+            //TODO: @test_util.run_v1_only("b/120545219")
 
-            //@test_util.run_v1_only("b/120545219")
-            //def testAggregationMethodAccumulateN(self):
-            //  with self.cached_session():
-            //    x = constant(1.0)
-            //    y = x * 2.0
-            //    z = y + y + y + y + y + y + y + y + y + y
-            //    grads = gradients.gradients(
-            //        z, [x, y],
-            //        aggregation_method=gradients.AggregationMethod.
-            //        EXPERIMENTAL_ACCUMULATE_N)
-            //    self.assertTrue(all(x is not None for x in grads))
-            //    self.assertEqual(20.0, grads[0].eval())
-            //    self.assertEqual(10.0, grads[1].eval())
-
+            using (self.cached_session())
+            {
+                var x = tf.constant(1.0);
+                var y = x * 2.0;
+                var z = y + y + y + y + y + y + y + y + y + y;
+                var grads = tf.gradients(z, new[] { x, y },
+                        aggregation_method: AggregationMethod.EXPERIMENTAL_ACCUMULATE_N);
+                self.assertTrue(all(grads.Select(x => x != null)));
+                self.assertEqual(20.0, grads[0].eval());
+                self.assertEqual(10.0, grads[1].eval());
+            }
         }
 
-        [Ignore("TODO")]
         [TestMethod]
         public void testAggregationMethodAddN()
         {
-            //@test_util.run_v1_only("b/120545219")
-            //def testAggregationMethodAddN(self):
-            //  with self.cached_session():
-            //    x = constant(1.0)
-            //    y = x * 2.0
-            //    z = y + y + y + y + y + y + y + y + y + y
-            //    grads = gradients.gradients(
-            //        z, [x, y], aggregation_method=gradients.AggregationMethod.ADD_N)
-            //    self.assertTrue(all(x is not None for x in grads))
-            //    self.assertEqual(20.0, grads[0].eval())
-            //    self.assertEqual(10.0, grads[1].eval())
-
+            //TODO: @test_util.run_v1_only("b/120545219")
 
+            using (self.cached_session())
+            {
+                var x = tf.constant(1.0);
+                var y = x * 2.0;
+                var z = y + y + y + y + y + y + y + y + y + y;
+                var grads = tf.gradients(z, new[] { x, y },
+                        aggregation_method: AggregationMethod.ADD_N);
+                self.assertTrue(grads.All(x => x != null));
+                self.assertEqual(20.0, grads[0].eval());
+                self.assertEqual(10.0, grads[1].eval());
+            }
         }
 
-        [Ignore("TODO")]
         [TestMethod]
         public void testAggregationMethodTree()
         {
-            //@test_util.run_v1_only("b/120545219")
-            //def testAggregationMethodTree(self):
-            //  with self.cached_session():
-            //    x = constant(1.0)
-            //    y = x * 2.0
-            //    z = y + y + y + y + y + y + y + y + y + y
-            //    grads = gradients.gradients(
-            //        z, [x, y],
-            //        aggregation_method=gradients.AggregationMethod.EXPERIMENTAL_TREE)
-            //    self.assertTrue(all(x is not None for x in grads))
-            //    self.assertEqual(20.0, grads[0].eval())
-            //    self.assertEqual(10.0, grads[1].eval())
+            //TODO: @test_util.run_v1_only("b/120545219")
 
+            using (self.cached_session())
+            {
+                var x = tf.constant(1.0);
+                var y = x * 2.0;
+                var z = y + y + y + y + y + y + y + y + y + y;
+                var grads = tf.gradients(z, new[] { x, y },
+                        aggregation_method: AggregationMethod.EXPERIMENTAL_TREE);
+                self.assertTrue(grads.All(x => x != null));
+                self.assertEqual(20.0, grads[0].eval());
+                self.assertEqual(10.0, grads[1].eval());
+            }
         }
 
         [Ignore("TODO")]
@@ -491,24 +488,37 @@ public void testNoGradientForStringOutputs()
             //    self.assertTrue(isinstance(grads[0], ops.Tensor))
         }
 
-        [Ignore("TODO")]
+        [Ignore("TODO: CompositeTensors are not supported yet.")]
         [TestMethod]
         public void testSingletonIndexedSlices()
         {
+            tf.Graph().as_default();
+
+            // TODO: uncomment when CompositeTensors are supported.
+            /*
+            var x = tf.placeholder(TF_DataType.TF_FLOAT);
+            var y = tf.identity(x);
+            var dy_indices = tf.placeholder(TF_DataType.TF_INT32);
+            var dy_values = tf.placeholder(TF_DataType.TF_FLOAT);
+            var dy = new IndexedSlices(dy_values, dy_indices);
+           
+            var dx = tf.gradients(new[] { y }, new[] { x }, grad_ys: new[] { dy })[0];
+            // The IndexedSlices gradient of tf.identity is the identity map.
+            using (var sess = self.cached_session())
+            {
+                var feed_dict = new FeedItem[]
+                {
+                    ( x, new Tensor(new float[] { 1.0f }) ),
+                    (dy_indices, new Tensor(new int[] { 0 })),
+                    (dy_values, new Tensor(new float[] { 2.0f }))
+                };
+                var result = sess.run(new[] { dx, dy }, feed_dict);
+                var vdx = result[0];
+                var vdy = result[1];
+                self.assertEqual(vdx, vdy);
+            }
+            */
 
-            //def testSingletonIndexedSlices(self):
-            //  with ops.Graph().as_default():
-            //    x = array_ops.placeholder(dtypes.float32)
-            //    y = array_ops.identity(x)
-            //    dy = ops.IndexedSlices(
-            //        array_ops.placeholder(dtypes.float32),
-            //        array_ops.placeholder(dtypes.int32))
-            //    dx, = gradients.gradients(y, x, grad_ys=dy)
-            //    # The IndexedSlices gradient of tf.identity is the identity map.
-            //    with self.cached_session() as sess:
-            //      vdx, vdy = sess.run(
-            //          [dx, dy], feed_dict={x: [1.0], dy.indices: [0], dy.values: [2.0]})
-            //    self.assertEqual(vdx, vdy)
         }
 
         [Ignore("TODO")]
@@ -576,26 +586,25 @@ public void testVariableRefGradient()
             //    self.assertIsNotNone(gradient)
         }
 
-        [Ignore("TODO")]
         [TestMethod]
         public void testDependentYs()
         {
-            //@test_util.run_v1_only("b/120545219")
-            //def testDependentYs(self):
-            //  with self.cached_session():
-            //    x = constant_op.constant(3.0)
-            //    y = math_ops.square(x)
-            //    y1 = math_ops.square(y)
-            //    y2 = math_ops.square(y1)
-            //    g = gradients.gradients([y, y2], x)
-            //    self.assertAllClose(17502.0, g[0].eval())
-            //    g = gradients.gradients(y + y2, x)
-            //    self.assertAllClose(17502.0, g[0].eval())
-            //    z = array_ops.identity(y)
-            //    z2 = array_ops.identity(y2)
-            //    g = gradients.gradients([z, z2], x)
-            //    self.assertAllClose(17502.0, g[0].eval())
-
+            //TODO: @test_util.run_v1_only("b/120545219")
+            using (self.cached_session())
+            {
+                var x = constant_op.constant(3.0);
+                var y = math_ops.square(x);
+                var y1 = math_ops.square(y);
+                var y2 = math_ops.square(y1);
+                var g = tf.gradients(new[] { y, y2 }, new[] { x });
+                self.assertAllClose(17502.0, g[0].eval());
+                g = tf.gradients(y + y2, x);
+                self.assertAllClose(17502.0, g[0].eval());
+                var z = array_ops.identity(y);
+                var z2 = array_ops.identity(y2);
+                g = tf.gradients(new[] { z, z2 }, new[] { x });
+                self.assertAllClose(17502.0, g[0].eval());
+            }
         }
 
         [Ignore("TODO")]
@@ -603,75 +612,152 @@ public void testDependentYs()
         public void testPartialDerivatives()
         {
 
-            //@test_util.run_v1_only("b/120545219")
-            //def testPartialDerivatives(self):
-            //  with self.cached_session():
-            //    x = constant_op.constant(1.)
-            //    y = 2 * x
-            //    z = x + y
-            //    totalg = gradients.gradients(z, [x, y])
-            //    self.assertEqual([3.0, 1.0], [g.eval() for g in totalg])
-            //    partialg = gradients.gradients(z, [x, y], stop_gradients=[x, y])
-            //    self.assertEqual([1.0, 1.0], [g.eval() for g in partialg])
+            //TODO: @test_util.run_v1_only("b/120545219")
+            using (self.cached_session())
+            {
+                var x = tf.constant(1.0);
+                var y = 2 * x;
+                var z = x + y;
+                var totalg = tf.gradients(z, new[] { x, y });
+                self.assertEqual(new[] { 3.0, 1.0 }, totalg.Select(g => g.eval()));
+                var partialg = tf.gradients(z, new[] { x, y }, stop_gradients: new[] { x, y });
+                self.assertEqual(new[] { 1.0, 1.0 }, partialg.Select(g => g.eval()));
+            }
         }
 
-        [Ignore("TODO")]
+        // TODO: remove when np.testing.assert_allclose(a, b) is implemented
+        private class CollectionComparer : System.Collections.IComparer
+        {
+            private readonly double _epsilon = 1e-07;
+
+            public int Compare(object x, object y)
+            {
+                var a = (double)x;
+                var b = (double)y;
+
+                double delta = Math.Abs(a - b);
+                if (delta < _epsilon)
+                {
+                    return 0;
+                }
+                return a.CompareTo(b);
+            }
+        }
+
+        private struct Case
+        {
+            public Tensor[] grad1;
+            public Tensor[] grad2;
+            public string constants;
+            public string variables;
+        }
+
+        [Ignore("FIXME")]
         [TestMethod]
         public void testStopGradients()
         {
+            
+            //TODO: @test_util.run_v1_only("b/120545219")
+            Dictionary<char, Tensor> makeGraph(RandomizedImpl rng, string stop_gradients)
+            {
+                Tensor functionOf(Tensor[] xs, int k)
+                {
+                    var shape = new Shape(k, k);
+                    // TODO: replace by DefaultIfEmpty() before Aggregate().
+                    if (!xs.Any())
+                    {
+                        return rng.random(shape).astype(np.float32);
+                    }
+                    return xs.Select(x => gen_math_ops.mat_mul(rng.random(shape).astype(np.float32), x))
+                        .Aggregate((t1, t2) => t1 + t2)
+                    + rng.random(shape).astype(np.float32);
+                }
 
+                var a = functionOf(Array.Empty<Tensor>(), 3);
+                if (stop_gradients.Contains('a')) a = array_ops.stop_gradient(a);
+                var b = functionOf(new Tensor[] { a }, 3);
+                if (stop_gradients.Contains('b')) b = array_ops.stop_gradient(b);
+                var c = functionOf(new Tensor[] { a, b }, 3);
+                if (stop_gradients.Contains('c')) c = array_ops.stop_gradient(c);
+                var d = functionOf(new Tensor[] { b, c }, 3);
+                if (stop_gradients.Contains('d')) d = array_ops.stop_gradient(d);
 
-            //@test_util.run_v1_only("b/120545219")
-            //def testStopGradients(self):
-            //  def _MakeGraph(rng, stop_gradients=()):
-            //    def _FunctionOf(xs, k=3):
-            //      return ops.convert_to_tensor(
-            //          sum(math_ops.matmul(rng.rand(k, k), x) for x in xs)
-            //          + rng.rand(k, k))
-
-            //    a = _FunctionOf([])
-            //    if "a" in stop_gradients: a = array_ops.stop_gradient(a)
-            //    b = _FunctionOf([a])
-            //    if "b" in stop_gradients: b = array_ops.stop_gradient(b)
-            //    c = _FunctionOf([a, b])
-            //    if "c" in stop_gradients: c = array_ops.stop_gradient(c)
-            //    d = _FunctionOf([b, c])
-            //    if "d" in stop_gradients: d = array_ops.stop_gradient(d)
-            //    return dict(a=a, b=b, c=c, d=d)
-
-            //  def _Gradients(ys, xs, **kwargs):
-            //    dydxs = gradients.gradients(ys, xs, **kwargs)
-            //    dydxs = [0. * x if dydx is None else dydx
-            //             for x, dydx in zip(xs, dydxs)]
-            //    return dydxs
-            //  seed = np.random.randint(1000)
-            //  cases = []
-            //  subsets = [""] + "a b c d ab ac ad bc bd cd abc abd acd bcd abcd".split()
-            //  graph = _MakeGraph(np.random.RandomState(seed))
-            //  for constants in subsets:
-            //    graph_with_stops = _MakeGraph(np.random.RandomState(seed), constants)
-            //    for variables_ in subsets:
-            //      # compute the gradient when stopped using tf.stop_gradients
-            //      grad1 = _Gradients([graph_with_stops["d"]],
-            //                         [graph_with_stops[v] for v in variables_])
-            //      # compute the gradient when stopped using the stop_gradients kwarg
-            //      grad2 = _Gradients([graph["d"]],
-            //                         [graph[v] for v in variables_],
-            //                         stop_gradients=[graph[v] for v in constants])
-            //      cases.append(dict(grad1=grad1, grad2=grad2,
-            //                        constants=constants, variables=variables_))
-
-            //  # evaluate all tensors in one call to session.run for speed
-            //  with self.cached_session() as sess:
-            //    results = sess.run([(case["grad1"], case["grad2"]) for case in cases])
-
-            //  for (npgrad1, npgrad2), case in zip(results, cases):
-            //    for a, b in zip(npgrad1, npgrad2):
-            //      np.testing.assert_allclose(a, b)
+                return new Dictionary<char, Tensor>
+                    {
+                        { 'a', a },
+                        { 'b', b },
+                        { 'c', c },
+                        { 'd', d }
+                    };
+            }
+
+            Tensor[] gradients(Tensor[] ys, Tensor[] xs, Tensor[] stop_gradients = null)
+            {
+                var dydxs = tf.gradients(ys, xs, stop_gradients);
+                dydxs = dydxs.Select((dydx, i) => dydx == null ? xs[i] * 0 : dydx).ToArray();
+                return dydxs;
+            }
+
+            var seed = np.random.randint(1000);
+            // TODO: remove next line when np.random.RandomState implemented.
+            tf.set_random_seed(seed);
+            var cases = new List<Case>();
+            // TODO: add "" case.
+            var subsets = new List<string> { "" }.Concat("a b c d ab ac ad bc bd cd abc abd acd bcd abcd".Split());
+            // TODO: pass np.random.RandomState(seed) instead of np.random
+            var graph = makeGraph(np.random, string.Empty);
+            foreach (var constants in subsets)
+            {
+                var graphWithStops = makeGraph(np.random, constants);
+                foreach (var variables_ in subsets)
+                {
+                    // compute the gradient when stopped using tf.stop_gradients
+                    var grad1 = gradients(
+                        new[] { graphWithStops['d'] },
+                        variables_.ToCharArray().Select(v => graphWithStops[v]).ToArray()
+                    );
+                    // compute the gradient when stopped using the stop_gradients from args
+                    var grad2 = gradients(
+                        new[] { graph['d'] },
+                        variables_.ToCharArray().Select(v => graph[v]).ToArray(),
+                        constants.ToCharArray().Select(c => graph[c]).DefaultIfEmpty(null)?.ToArray()
+                    );
+                    cases.Add(new Case
+                    {
+                        grad1 = grad1,
+                        grad2 = grad2,
+                        variables = variables_,
+                        constants = constants,
+                    }) ;
+                }
+            }
 
+            // evaluate all tensors in one call to session.run for speed
+            using (var sess = self.cached_session())
+            {
+                var results = sess.run(
+                    cases.Select(case_ => (
+                        case_.grad1,
+                        case_.grad2
+                    )).ToArray()
+                );
+
+                foreach (var (result, case_) in results.Zip(cases))
+                {
+                    var npgrad1 = result[0];
+                    var npgrad2 = result[1];
+                    foreach (var (a, b) in npgrad1.Zip(npgrad2))
+                    {
+                        // TODO: np.testing.assert_allclose(a, b);
+                        CollectionAssert.AreEqual(a.ToArray(), b.ToArray(), new CollectionComparer());
+                    }
+                }
+            }
         }
 
-        [Ignore("TODO")]
+
+
+        [Ignore("TODO: Unconnected gradients are not implemented")]
         [TestMethod]
         public void testUnconnectedGradientsNoneUnconnectedGradients()
         {
@@ -686,7 +772,7 @@ public void testUnconnectedGradientsNoneUnconnectedGradients()
             //  self.assertIsNone(grad[0])
         }
 
-        [Ignore("TODO")]
+        [Ignore("TODO: Unconnected gradients are not implemented")]
         [TestMethod]
         public void testUnconnectedGradientsZerosUnconnectedGradients()
         {
@@ -700,15 +786,21 @@ public void testUnconnectedGradientsZerosUnconnectedGradients()
             //        [y], [x], unconnected_gradients="zero")
             //    with self.cached_session() as sess:
             //      self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(grads)[0])
+
+            // tf.Graph().as_default();
+            // var x = tf.constant(1.0, shape: new long[] { 2, 2 });
+            // var y = tf.constant(3.0, shape: new long[] { 3, 1 });
+            // var grads = tf.gradients(new[] { y }, new[] { x }, unconnected_gradients: "zero");
+            // using (self.cached_session())
+            // {
+            //     self.assertAllEqual(new[,] { { 0.0, 0.0 }, { 0.0, 0.0 } }, self.evaluate(grads)[0]);
+            // }
         }
 
-        [Ignore("TODO")]
+        [Ignore("TODO: Unconnected gradients are not implemented")]
         [TestMethod]
         public void testUnconnectedGradientsZeroConnectedGradients()
         {
-
-
-
             //def testUnconnectedGradientsZeroConnectedGradients(self):
             //  with ops.Graph().as_default():
             //    x = constant(1.0)
@@ -717,9 +809,19 @@ public void testUnconnectedGradientsZeroConnectedGradients()
             //        [y], [x], unconnected_gradients="zero")
             //    with self.cached_session() as sess:
             //      self.assertEquals(3.0, self.evaluate(grad)[0])
+
+            // tf.Graph().as_default();
+
+            // var x = tf.constant(1.0f);
+            // var y = x * 3.0f;
+            // var grad = tf.gradients(new [] { y }, new [] { x }, unconnected_gradients: "zero");
+            // using (var sess = tf.Session())
+            // {
+            //     self.assertEquals(3.0, self.evaluate(grad)[0]);
+            // }
         }
 
-        [Ignore("TODO")]
+        [Ignore("TODO: Unconnected gradients are not implemented")]
         [TestMethod]
         public void testUnknownUnconnectedGradientsValueGiven()
         {
@@ -730,15 +832,6 @@ public void testUnknownUnconnectedGradientsValueGiven()
             //    with self.assertRaisesRegexp(
             //        ValueError, "Unknown value for unconnected_gradients: 'nonsense'"):
             //      gradients.gradients([y], [x], unconnected_gradients="nonsense")
-
         }
-
-
-
-        /*
-
-
-
-         */
     }
 }
diff --git a/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs b/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs
index c42445cf1..d671b6096 100644
--- a/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs
@@ -3,6 +3,7 @@
 using System.Linq;
 using Tensorflow;
 using static Tensorflow.Binding;
+using System;
 
 namespace TensorFlowNET.UnitTest
 {
@@ -22,13 +23,86 @@ public void Initialize()
             contents = tf.io.read_file(imgPath);
         }
 
+        [TestMethod]
+        public void adjust_contrast()
+        {
+            var input = np.array(0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f);
+            var image = tf.reshape(input, new int[] { 3, 3, 1 });
+
+            var init = tf.global_variables_initializer();
+            var sess = tf.Session();
+            sess.run(init);
+            var adjust_contrast = tf.image.adjust_contrast(image, 2.0f);
+            var result = sess.run(adjust_contrast);
+            var res = np.array(-4f, -2f, 0f, 2f, 4f, 6f, 8f, 10f, 12f).reshape((3,3,1));
+            Assert.AreEqual(result.numpy(), res);
+        }
+
+        [Ignore]
+        [TestMethod]
+        public void adjust_hue()
+        {
+            var image = tf.constant(new int[] {1,2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18});
+            image = tf.reshape(image, new int[] { 3, 2, 3 });
+            var adjusted_image = tf.image.adjust_hue(image, 0.2f);
+            var res = tf.constant(new int[] {2,1,3, 4, 5, 6,8,7,9,11,10,12,14,13,15,17,16,18});
+            res = tf.reshape(res,(3,2,3));
+            Assert.AreEqual(adjusted_image, res);
+        }
+
+        [TestMethod]
+        public void combined_non_max_suppression()
+        {
+            var boxesX = tf.constant(new float[,] { { 200, 100, 150, 100 }, { 220, 120, 150, 100 }, { 190, 110, 150, 100 }, { 210, 112, 150, 100 } });
+            var boxes1 = tf.reshape(boxesX, (1, 4, 1, 4));
+            var scoresX = tf.constant(new float[,] { { 0.2f, 0.7f, 0.1f }, { 0.1f, 0.8f, 0.1f }, { 0.3f, 0.6f, 0.1f }, { 0.05f, 0.9f, 0.05f } });
+            var scores1 = tf.reshape(scoresX, (1, 4, 3));
+
+            var init = tf.global_variables_initializer();
+            var sess = tf.Session();
+            sess.run(init);
+
+            var (boxes, scores, classes, valid_detections) = tf.image.combined_non_max_suppression(boxes1, scores1, 10, 10, 0.5f, 0.2f, clip_boxes: false);
+            var result = sess.run((boxes, scores, classes, valid_detections));
+
+            var boxes_gt = tf.constant(new float[,] { { 210f, 112f, 150f, 100f }, { 200f, 100f, 150f, 100f }, { 190f, 110f, 150f, 100f },
+                { 0f, 0f, 0f, 0f},{ 0f, 0f, 0f, 0f},{ 0f, 0f, 0f, 0f},{ 0f, 0f, 0f , 0f},{ 0f, 0f, 0f, 0f},{ 0f , 0f, 0f, 0f},{ 0f, 0f, 0f, 0f} });
+            boxes_gt = tf.reshape(boxes_gt, (1, 10, 4));
+            Assert.AreEqual(result.Item1.numpy(), boxes_gt.numpy());
+            var scores_gt = tf.constant(new float[,] { { 0.9f, 0.7f, 0.3f, 0f, 0f, 0f, 0f, 0f, 0f, 0f } });
+            scores_gt = tf.reshape(scores_gt, (1, 10));
+            Assert.AreEqual(result.Item2.numpy(), scores_gt.numpy());
+            var classes_gt = tf.constant(new float[,] { { 1f, 1f, 0f, 0f, 0f, 0f, 0f, 0f, 0f, 0f } });
+            classes_gt = tf.reshape(classes_gt, (1, 10));
+            Assert.AreEqual(result.Item3.numpy(), classes_gt.numpy());
+            var valid_detections_gt = tf.constant(new int[,] { { 3 } });
+            valid_detections_gt = tf.reshape(valid_detections_gt, (1));
+            Assert.AreEqual(result.Item4.numpy(), valid_detections_gt.numpy());
+        }
+
+        [TestMethod]
+        public void crop_and_resize()
+        {
+            int BATCH_SIZE = 1;
+            int NUM_BOXES = 5;
+            int IMAGE_HEIGHT = 256;
+            int IMAGE_WIDTH = 256;
+            int CHANNELS = 3;
+            var crop_size = tf.constant(new int[] { 24, 24 });
+            var image = tf.random.uniform((BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS));
+            var boxes = tf.random.uniform((NUM_BOXES, 4));
+            var box_ind = tf.random.uniform((NUM_BOXES), minval: 0, maxval: BATCH_SIZE, dtype: TF_DataType.TF_INT32);
+            var output = tf.image.crop_and_resize(image, boxes, box_ind, crop_size);
+            Assert.AreEqual((5,24,24,3), output.shape);
+        }
+
         [TestMethod]
         public void decode_image()
         {
             var img = tf.image.decode_image(contents);
             Assert.AreEqual(img.name, "decode_image/DecodeImage:0");
         }
-
+            
         [TestMethod]
         public void resize_image()
         {
diff --git a/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs b/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs
index 513791933..ccf59f5ae 100644
--- a/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs
@@ -6,6 +6,8 @@
 using System.Linq;
 using Tensorflow;
 using static Tensorflow.Binding;
+using OneOf.Types;
+using System.Collections.Generic;
 
 namespace TensorFlowNET.UnitTest
 {
@@ -139,6 +141,21 @@ public void assertProtoEquals(object toProto, object o)
 
         #region tensor evaluation and test session
 
+        private Session _cached_session = null;
+        private Graph _cached_graph = null;
+        private object _cached_config = null;
+        private bool _cached_force_gpu = false;
+
+        private void _ClearCachedSession()
+        {
+            if (self._cached_session != null)
+            {
+                self._cached_session.Dispose();
+                self._cached_session = null;
+            }
+        }
+
+
         //protected object _eval_helper(Tensor[] tensors)
         //{
         //    if (tensors == null)
@@ -203,10 +220,56 @@ public T evaluate<T>(Tensor tensor)
             }
         }
 
-
-        public Session cached_session()
+        ///Returns a TensorFlow Session for use in executing tests.
+        public Session cached_session(
+            Graph graph = null, object config = null, bool use_gpu = false, bool force_gpu = false)
         {
-            throw new NotImplementedException();
+            // This method behaves differently than self.session(): for performance reasons
+            // `cached_session` will by default reuse the same session within the same
+            // test.The session returned by this function will only be closed at the end
+            // of the test(in the TearDown function).
+
+            // Use the `use_gpu` and `force_gpu` options to control where ops are run.If
+            // `force_gpu` is True, all ops are pinned to `/ device:GPU:0`. Otherwise, if
+            // `use_gpu` is True, TensorFlow tries to run as many ops on the GPU as
+            // possible.If both `force_gpu and `use_gpu` are False, all ops are pinned to
+            // the CPU.
+
+            // Example:
+            // python
+            // class MyOperatorTest(test_util.TensorFlowTestCase) :
+            //   def testMyOperator(self):
+            //     with self.cached_session() as sess:
+            //       valid_input = [1.0, 2.0, 3.0, 4.0, 5.0]
+            //     result = MyOperator(valid_input).eval()
+            //       self.assertEqual(result, [1.0, 2.0, 3.0, 5.0, 8.0]
+            //       invalid_input = [-1.0, 2.0, 7.0]
+            //     with self.assertRaisesOpError("negative input not supported"):
+            //         MyOperator(invalid_input).eval()
+
+
+            // Args:
+            //   graph: Optional graph to use during the returned session.
+            //   config: An optional config_pb2.ConfigProto to use to configure the
+            //     session.
+            //   use_gpu: If True, attempt to run as many ops as possible on GPU.
+            //   force_gpu: If True, pin all ops to `/device:GPU:0`.
+
+            // Yields:
+            //   A Session object that should be used as a context manager to surround
+            //   the graph building and execution code in a test case.
+
+
+            // TODO:
+            //  if context.executing_eagerly():
+            //    return self._eval_helper(tensors)
+            //  else:
+            {
+                var sess = self._get_cached_session(
+                    graph, config, force_gpu, crash_if_inconsistent_args: true);
+                using var cached = self._constrain_devices_and_set_default(sess, use_gpu, force_gpu);
+                return cached; 
+            }
         }
 
         //Returns a TensorFlow Session for use in executing tests.
@@ -254,6 +317,39 @@ public Session session(Graph graph = null, object config = null, bool use_gpu =
             return s.as_default();
         }
 
+        private Session _constrain_devices_and_set_default(Session sess, bool use_gpu, bool force_gpu)
+        {
+            // Set the session and its graph to global default and constrain devices."""
+            if (tf.executing_eagerly())
+                return null;
+            else {
+                sess.graph.as_default();
+                sess.as_default();
+                {
+                    if (force_gpu)
+                    {
+                        // TODO:
+
+                        // Use the name of an actual device if one is detected, or
+                        // '/device:GPU:0' otherwise
+                        /* var gpu_name = gpu_device_name();
+                        if (!gpu_name)
+                            gpu_name = "/device:GPU:0"
+                        using (sess.graph.device(gpu_name)) {
+                            yield return sess;
+                        }*/
+                        return sess;
+                    }
+                    else if (use_gpu)
+                        return sess;
+                    else 
+                        using (sess.graph.device("/device:CPU:0"))
+                            return sess;
+                }
+                
+            }
+        }
+
         // See session() for details.
         private Session _create_session(Graph graph, object cfg, bool forceGpu)
         {
@@ -298,6 +394,50 @@ private Session _create_session(Graph graph, object cfg, bool forceGpu)
             return new Session(graph);//, config = prepare_config(config))
         }
 
+        private Session _get_cached_session(
+                          Graph graph = null,
+                          object config = null,
+                          bool force_gpu = false,
+                          bool crash_if_inconsistent_args = true)
+        {
+            // See cached_session() for documentation.
+            if (self._cached_session == null)
+            {
+                var sess = self._create_session(graph, config, force_gpu);
+                self._cached_session = sess;
+                self._cached_graph = graph;
+                self._cached_config = config;
+                self._cached_force_gpu = force_gpu;
+                return sess;
+            } else {
+
+                if (crash_if_inconsistent_args && !self._cached_graph.Equals(graph))
+                    throw new ValueError(@"The graph used to get the cached session is 
+                                           different than the one that was used to create the
+                                           session. Maybe create a new session with 
+                                           self.session()");
+                if (crash_if_inconsistent_args && !self._cached_config.Equals(config)) {
+                    throw new ValueError(@"The config used to get the cached session is 
+                                           different than the one that was used to create the 
+                                           session. Maybe create a new session with 
+                                           self.session()");
+                }
+                if (crash_if_inconsistent_args && !self._cached_force_gpu.Equals(force_gpu)) {
+                    throw new ValueError(@"The force_gpu value used to get the cached session is 
+                                           different than the one that was used to create the 
+                                           session. Maybe create a new session with 
+                                           self.session()");
+                }
+                return _cached_session;
+            }
+        }
+
+        [TestCleanup]
+        public void Cleanup()
+        {
+            _ClearCachedSession();
+        }
+
         #endregion
 
         public void AssetSequenceEqual<T>(T[] a, T[] b)
diff --git a/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/fingerprint.pb b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/fingerprint.pb
new file mode 100644
index 000000000..c37cc37bd
--- /dev/null
+++ b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/fingerprint.pb
@@ -0,0 +1 @@
+����������̟땐͉��������� ��Σ�����(��ռ����2
\ No newline at end of file
diff --git a/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/keras_metadata.pb b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/keras_metadata.pb
new file mode 100644
index 000000000..5fe8f1a65
--- /dev/null
+++ b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/keras_metadata.pb
@@ -0,0 +1,7 @@
+
+�&root"_tf_keras_sequential*�&{"name": "sequential", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": false, "class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 5, 3]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}}, {"class_name": "LSTM", "config": {"name": "lstm", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 32, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 2}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 3}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2}}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "shared_object_id": 9, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 5, 3]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 5, 3]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 5, 3]}, "float32", "input_1"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 5, 3]}, "float32", "input_1"]}, "keras_version": "2.12.0", "backend": "tensorflow", "model_config": {"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 5, 3]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "shared_object_id": 0}, {"class_name": "LSTM", "config": {"name": "lstm", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 32, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 2}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 3}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2}, "shared_object_id": 5}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 6}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 7}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 8}]}}, "training_config": {"loss": "binary_crossentropy", "metrics": [[{"class_name": "MeanMetricWrapper", "config": {"name": "accuracy", "dtype": "float32", "fn": "binary_accuracy"}, "shared_object_id": 11}]], "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Custom>Adam", "config": {"name": "Adam", "weight_decay": null, "clipnorm": null, "global_clipnorm": null, "clipvalue": null, "use_ema": false, "ema_momentum": 0.99, "ema_overwrite_frequency": null, "jit_compile": false, "is_legacy_optimizer": false, "learning_rate": 0.0010000000474974513, "beta_1": 0.9, "beta_2": 0.999, "epsilon": 1e-07, "amsgrad": false}}}}2
+�root.layer_with_weights-0"_tf_keras_rnn_layer*�{"name": "lstm", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "LSTM", "config": {"name": "lstm", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 32, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 2}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 3}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2}, "shared_object_id": 5, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 3]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 12}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 5, 3]}}2
+�root.layer_with_weights-1"_tf_keras_layer*�{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 6}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 7}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 8, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 32}}, "shared_object_id": 13}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 32]}}2
+�root.layer_with_weights-0.cell"_tf_keras_layer*�{"name": "lstm_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "LSTMCell", "config": {"name": "lstm_cell", "trainable": true, "dtype": "float32", "units": 32, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 2}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 3}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2}, "shared_object_id": 4, "build_input_shape": {"class_name": "__tuple__", "items": [null, 3]}}2
+�Rroot.keras_api.metrics.0"_tf_keras_metric*�{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 14}2
+�Sroot.keras_api.metrics.1"_tf_keras_metric*�{"class_name": "MeanMetricWrapper", "name": "accuracy", "dtype": "float32", "config": {"name": "accuracy", "dtype": "float32", "fn": "binary_accuracy"}, "shared_object_id": 11}2
\ No newline at end of file
diff --git a/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/saved_model.pb b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/saved_model.pb
new file mode 100644
index 000000000..618c800eb
Binary files /dev/null and b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/saved_model.pb differ
diff --git a/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/variables/variables.data-00000-of-00001 b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/variables/variables.data-00000-of-00001
new file mode 100644
index 000000000..ea67db4f4
Binary files /dev/null and b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/variables/variables.data-00000-of-00001 differ
diff --git a/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/variables/variables.index b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/variables/variables.index
new file mode 100644
index 000000000..11f13d165
Binary files /dev/null and b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/variables/variables.index differ
diff --git a/test/TensorFlowNET.Keras.UnitTest/InitLayerNameTest.cs b/test/TensorFlowNET.Keras.UnitTest/InitLayerNameTest.cs
new file mode 100644
index 000000000..256eb69c1
--- /dev/null
+++ b/test/TensorFlowNET.Keras.UnitTest/InitLayerNameTest.cs
@@ -0,0 +1,33 @@
+﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
+using Tensorflow.Keras.Layers;
+using static Tensorflow.Binding;
+using static Tensorflow.KerasApi;
+
+namespace Tensorflow.Keras.UnitTest
+{
+    [TestClass]
+    public class InitLayerNameTest
+    {
+        [TestMethod]
+        public void RNNLayerNameTest() 
+        {
+            var simpleRnnCell = keras.layers.SimpleRNNCell(1);
+            Assert.AreEqual("simple_rnn_cell", simpleRnnCell.Name);
+            var simpleRnn = keras.layers.SimpleRNN(2);
+            Assert.AreEqual("simple_rnn", simpleRnn.Name);
+            var lstmCell = keras.layers.LSTMCell(2);
+            Assert.AreEqual("lstm_cell", lstmCell.Name);
+            var lstm = keras.layers.LSTM(3);
+            Assert.AreEqual("lstm", lstm.Name);
+        }
+
+        [TestMethod]
+        public void ConvLayerNameTest()
+        {
+            var conv2d = keras.layers.Conv2D(8, activation: "linear");
+            Assert.AreEqual("conv2d", conv2d.Name);
+            var conv2dTranspose = keras.layers.Conv2DTranspose(8);
+            Assert.AreEqual("conv2d_transpose", conv2dTranspose.Name);
+        }
+    }
+}
diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs
index 98d909668..7ebb53db3 100644
--- a/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs
@@ -110,6 +110,17 @@ public void Embedding()
             var output_array = model.predict(input_array);
             Assert.AreEqual((32, 10, 64), output_array.shape);
         }
+        [TestMethod]
+        public void EmbeddingGrad()
+        {
+            var inputs = keras.layers.Input(shape: new[] { 32, 10 });
+            var outputs = keras.layers.Embedding(1000, 64, input_length: 10).Apply(inputs);
+            var model = keras.Model(inputs: inputs, outputs: outputs);
+            var input_array = np.random.randint(1000, size: (1, 32, 10));
+            var output_array = np.random.random(size: (1, 32, 10, 64));
+            model.compile("rmsprop", "mse", new[] { "accuracy" });
+            model.fit(input_array, output_array);
+        }
 
         /// <summary>
         /// https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense
diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs
index 8eeee7a88..dbf5cae1e 100644
--- a/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs
@@ -5,8 +5,9 @@
 using System.Text;
 using System.Threading.Tasks;
 using Tensorflow.Common.Types;
+using Tensorflow.Keras.ArgsDefinition;
 using Tensorflow.Keras.Engine;
-using Tensorflow.Keras.Layers.Rnn;
+using Tensorflow.Keras.Layers;
 using Tensorflow.Keras.Saving;
 using Tensorflow.NumPy;
 using Tensorflow.Train;
@@ -38,8 +39,6 @@ public void StackedRNNCell()
             var cells = new IRnnCell[] { tf.keras.layers.SimpleRNNCell(4), tf.keras.layers.SimpleRNNCell(5) };
             var stackedRNNCell = tf.keras.layers.StackedRNNCells(cells);
             var (output, state) = stackedRNNCell.Apply(inputs, states);
-            Console.WriteLine(output);
-            Console.WriteLine(state.shape);
             Assert.AreEqual((32, 5), output.shape);
             Assert.AreEqual((32, 4), state[0].shape);
         }
@@ -108,6 +107,7 @@ public void RNNForSimpleRNNCell()
             var inputs = tf.random.normal((32, 10, 8));
             var cell = tf.keras.layers.SimpleRNNCell(10, dropout: 0.5f, recurrent_dropout: 0.5f);
             var rnn = tf.keras.layers.RNN(cell: cell);
+            var cgf = rnn.get_config();
             var output = rnn.Apply(inputs);
             Assert.AreEqual((32, 10), output.shape);
 
@@ -132,5 +132,36 @@ public void RNNForLSTMCell()
             Console.WriteLine($"output: {output}");
             Assert.AreEqual((5, 4), output.shape);
         }
+
+        [TestMethod]
+        public void GRUCell()
+        {
+            var inputs = tf.random.normal((32, 10, 8));
+            var rnn = tf.keras.layers.RNN(tf.keras.layers.GRUCell(4));
+            var output = rnn.Apply(inputs);
+            Assert.AreEqual((32, 4), output.shape);
+            rnn = tf.keras.layers.RNN(tf.keras.layers.GRUCell(4, reset_after:false, use_bias:false));
+            output = rnn.Apply(inputs);
+            Assert.AreEqual((32, 4), output.shape);
+
+        }
+
+        [TestMethod]
+        public void GRU()
+        {
+            var inputs = tf.ones((32, 10, 8));
+            var gru = tf.keras.layers.GRU(4);
+            var output = gru.Apply(inputs);
+            Assert.AreEqual((32, 4), output.shape);
+        }
+
+        [TestMethod]
+        public void Bidirectional()
+        {
+            var bi = tf.keras.layers.Bidirectional(keras.layers.LSTM(10, return_sequences:true));
+            var inputs = tf.random.normal((32, 10, 8));
+            var outputs = bi.Apply(inputs);
+            Assert.AreEqual((32, 10, 20), outputs.shape);
+        }
     }
 }
diff --git a/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs b/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs
index 10db2bd11..cb570fc0c 100644
--- a/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs
@@ -1,5 +1,7 @@
-﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
+﻿using Microsoft.VisualStudio.TestPlatform.Utilities;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
 using System.Linq;
+using Tensorflow.Keras.Engine;
 using Tensorflow.Keras.Optimizers;
 using Tensorflow.Keras.UnitTest.Helpers;
 using Tensorflow.NumPy;
@@ -79,6 +81,18 @@ public void ModelWithSelfDefinedModule()
         model.fit(dataset.Train.Data, dataset.Train.Labels, batch_size, num_epochs);
     }
 
+    [Ignore]
+    [TestMethod]
+    public void LSTMLoad()
+    {
+        var model = tf.keras.models.load_model(@"Assets/lstm_from_sequential");
+        model.summary();
+        model.compile(tf.keras.optimizers.Adam(), tf.keras.losses.MeanSquaredError(), new string[] { "accuracy" });
+        var inputs = tf.random.normal(shape: (10, 5, 3));
+        var outputs = tf.random.normal(shape: (10, 1));
+        model.fit(inputs.numpy(), outputs.numpy(), batch_size: 10, epochs: 5, workers: 16, use_multiprocessing: true);
+    }
+
     [Ignore]
     [TestMethod]
     public void VGG19()
diff --git a/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj b/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj
index 58c176e82..3910eba1c 100644
--- a/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj
+++ b/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj
@@ -65,6 +65,22 @@
     <None Update="Assets\python_func_model\variables\variables.index">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>
+
+    <None Update="Assets\lstm_from_sequential\fingerprint.pb">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="Assets\lstm_from_sequential\keras_metadata.pb">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="Assets\lstm_from_sequential\saved_model.pb">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="Assets\lstm_from_sequential\variables\variables.data-00000-of-00001">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="Assets\lstm_from_sequential\variables\variables.index">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
   </ItemGroup>
 
 </Project>
diff --git a/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs b/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
index 8317346ea..183544ab6 100644
--- a/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
+++ b/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
@@ -1,7 +1,10 @@
 ﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
 using System;
+using System.Collections.Generic;
 using System.Linq;
+using Tensorflow.NumPy;
 using static Tensorflow.Binding;
+using static Tensorflow.KerasApi;
 
 namespace TensorFlowNET.UnitTest.Dataset
 {
@@ -195,5 +198,40 @@ public void Shuffle()
 
             Assert.IsFalse(allEqual);
         }
+        [Ignore]
+        [TestMethod]
+        public void GetData()
+        {
+            var vocab_size = 20000; // Only consider the top 20k words
+            var maxlen = 200; // Only consider the first 200 words of each movie review
+            var dataset = keras.datasets.imdb.load_data(num_words: vocab_size, maxlen: maxlen);
+            var x_train = dataset.Train.Item1;
+            var y_train = dataset.Train.Item2;
+            var x_val = dataset.Test.Item1;
+            var y_val = dataset.Test.Item2;
+
+            x_train = keras.preprocessing.sequence.pad_sequences(RemoveZeros(x_train), maxlen: maxlen);
+            x_val = keras.preprocessing.sequence.pad_sequences(RemoveZeros(x_val), maxlen: maxlen);
+            print(len(x_train) + " Training sequences");
+            print(len(x_val) + " Validation sequences");
+        }
+        IEnumerable<int[]> RemoveZeros(NDArray data)
+        {
+            var data_array = (int[,])data.ToMultiDimArray<int>();
+            List<int[]> new_data = new List<int[]>();
+            for (var i = 0; i < data_array.GetLength(0); i++)
+            {
+                List<int> new_array = new List<int>();
+                for (var j = 0; j < data_array.GetLength(1); j++)
+                {
+                    if (data_array[i, j] == 0)
+                        break;
+                    else
+                        new_array.Add(data_array[i, j]);
+                }
+                new_data.Add(new_array.ToArray());
+            }
+            return new_data;
+        }
     }
 }
diff --git a/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs b/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs
index 72f598e46..675689bb1 100644
--- a/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs
+++ b/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs
@@ -2,6 +2,7 @@
 using Tensorflow.NumPy;
 using Tensorflow;
 using static Tensorflow.Binding;
+using System.Linq;
 
 namespace TensorFlowNET.UnitTest.ManagedAPI
 {
@@ -92,5 +93,17 @@ public void TensorArray()
             Assert.AreEqual(ta.read(1).numpy(), 20f);
             Assert.AreEqual(ta.read(2).numpy(), 30f);
         }
+
+        /// <summary>
+        /// https://www.tensorflow.org/api_docs/python/tf/reverse
+        /// </summary>
+        [TestMethod]
+        public void ReverseArray()
+        {
+            var a = tf.random.normal((2, 3));
+            var b = tf.reverse(a, -1);
+            Assert.IsTrue(Equal(a[0].ToArray<float>().Reverse().ToArray(), b[0].ToArray<float>()));
+            Assert.IsTrue(Equal(a[1].ToArray<float>().Reverse().ToArray(), b[1].ToArray<float>()));
+        }
     }
 }
diff --git a/test/TensorFlowNET.UnitTest/ManagedAPI/MathApiTest.cs b/test/TensorFlowNET.UnitTest/ManagedAPI/MathApiTest.cs
index 42ac641b1..411deb18f 100644
--- a/test/TensorFlowNET.UnitTest/ManagedAPI/MathApiTest.cs
+++ b/test/TensorFlowNET.UnitTest/ManagedAPI/MathApiTest.cs
@@ -1,6 +1,8 @@
 ﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
+using System;
 using System.Linq;
 using Tensorflow;
+using Tensorflow.NumPy;
 using static Tensorflow.Binding;
 
 namespace TensorFlowNET.UnitTest.ManagedAPI
@@ -57,5 +59,26 @@ public void Erf()
             var actual = erf.ToArray<float>();
             Assert.IsTrue(Equal(expected, actual));
         }
+
+        [TestMethod]
+        public void ReduceEuclideanNorm()
+        {
+            var x = tf.constant(new[,] { { 1, 2, 3 }, { 1, 1, 1 } });
+            Assert.AreEqual(tf.math.reduce_euclidean_norm(x).numpy(), 4);
+
+            var y = tf.constant(new[,] { { 1, 2, 3 }, { 1, 1, 1 } }, dtype: tf.float32);
+            Assert.IsTrue(Equal(tf.math.reduce_euclidean_norm(y).numpy(), 4.1231055f));
+
+            Assert.IsTrue(Equal(tf.math.reduce_euclidean_norm(y, 0).ToArray<float>(), 
+                new float[] { np.sqrt(2f), np.sqrt(5f), np.sqrt(10f) }));
+
+            Assert.IsTrue(Equal(tf.math.reduce_euclidean_norm(y, 1).ToArray<float>(),
+                new float[] { np.sqrt(14f), np.sqrt(3f) }));
+
+            Assert.IsTrue(Equal(tf.math.reduce_euclidean_norm(y, 1, keepdims: true).ToArray<float>(),
+                new float[] { np.sqrt(14f), np.sqrt(3f) }));
+
+            Assert.AreEqual(tf.math.reduce_euclidean_norm(y, (0, 1)).numpy(), np.sqrt(17f));
+        }
     }
 }
diff --git a/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj b/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj
index 240960c91..7a6a7f92c 100644
--- a/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj
+++ b/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj
@@ -41,8 +41,8 @@
 
   <ItemGroup>
     <PackageReference Include="FluentAssertions" Version="5.10.3" />
-    <PackageReference Include="MethodBoundaryAspect.Fody" Version="2.0.148" />
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.2" />
+    <PackageReference Include="MethodBoundaryAspect.Fody" Version="2.0.149" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.6.3" />
     <PackageReference Include="MSTest.TestAdapter" Version="2.2.10" />
     <PackageReference Include="MSTest.TestFramework" Version="2.2.10" />
   </ItemGroup>