diff --git a/src/TensorFlowNET.Core/APIs/tf.array.cs b/src/TensorFlowNET.Core/APIs/tf.array.cs index 6a646512a..4d9c3da58 100644 --- a/src/TensorFlowNET.Core/APIs/tf.array.cs +++ b/src/TensorFlowNET.Core/APIs/tf.array.cs @@ -91,8 +91,7 @@ public Tensor concat(IEnumerable values, int axis, string name = "concat return identity(values.First(), name: scope); }); } - - return gen_array_ops.concat_v2(values.ToArray(), ops.convert_to_tensor(axis), name: name); + return array_ops.concat(values.ToArray(), axis, name: name); } /// @@ -163,14 +162,17 @@ public Tensor transpose(T1 a, Axis perm = null, string name = "transpose", b /// Reverses specific dimensions of a tensor. /// /// - /// + /// The indices of the dimensions to reverse. Must be in the range [-rank(tensor), rank(tensor)). /// /// - public Tensor reverse(Tensor tensor, int[] axis, string name = null) - => gen_array_ops.reverse(tensor, ops.convert_to_tensor(axis), name: name); - - public Tensor reverse(Tensor tensor, Tensor axis, string name = null) - => gen_array_ops.reverse(tensor, axis, name: name); + public Tensor reverse(Tensor tensor, Axis axis, string name = null) + { + if (axis.IsScalar) + { + axis = new Axis(axis.axis); + } + return array_ops.reverse(tensor, axis, name: name); + } /// /// Returns the rank of a tensor. diff --git a/src/TensorFlowNET.Core/APIs/tf.image.cs b/src/TensorFlowNET.Core/APIs/tf.image.cs index 9230b50dc..ac9cbc60d 100644 --- a/src/TensorFlowNET.Core/APIs/tf.image.cs +++ b/src/TensorFlowNET.Core/APIs/tf.image.cs @@ -14,6 +14,10 @@ You may obtain a copy of the License at limitations under the License. ******************************************************************************/ +using OneOf.Types; +using System; +using System.Buffers.Text; +using Tensorflow.Contexts; using static Tensorflow.Binding; namespace Tensorflow @@ -162,17 +166,108 @@ public Tensor ssim_multiscale(Tensor img1, Tensor img2, float max_val, float[] p public Tensor sobel_edges(Tensor image) => image_ops_impl.sobel_edges(image); - public Tensor decode_jpeg(Tensor contents, - int channels = 0, - int ratio = 1, - bool fancy_upscaling = true, - bool try_recover_truncated = false, - int acceptable_fraction = 1, - string dct_method = "", - string name = null) - => gen_image_ops.decode_jpeg(contents, channels: channels, ratio: ratio, - fancy_upscaling: fancy_upscaling, try_recover_truncated: try_recover_truncated, - acceptable_fraction: acceptable_fraction, dct_method: dct_method); + /// + /// Adjust contrast of RGB or grayscale images. + /// + /// Images to adjust. At least 3-D. + /// + /// A float multiplier for adjusting contrast. + /// The contrast-adjusted image or images. + public Tensor adjust_contrast(Tensor images, float contrast_factor, string name = null) + => gen_image_ops.adjust_contrastv2(images, contrast_factor, name); + + /// + /// Adjust hue of RGB images. + /// + /// RGB image or images. The size of the last dimension must be 3. + /// float. How much to add to the hue channel. + /// A name for this operation (optional). + /// Adjusted image(s), same shape and DType as `image`. + /// if `delta` is not in the interval of `[-1, 1]`. + public Tensor adjust_hue(Tensor images, float delta, string name = null) + { + if (tf.Context.executing_eagerly()) + { + if (delta < -1f || delta > 1f) + throw new ValueError("delta must be in the interval [-1, 1]"); + } + return gen_image_ops.adjust_hue(images, delta, name: name); + } + + /// + /// Adjust saturation of RGB images. + /// + /// RGB image or images. The size of the last dimension must be 3. + /// float. Factor to multiply the saturation by. + /// A name for this operation (optional). + /// Adjusted image(s), same shape and DType as `image`. + public Tensor adjust_saturation(Tensor image, float saturation_factor, string name = null) + => gen_image_ops.adjust_saturation(image, saturation_factor, name); + + /// + /// Greedily selects a subset of bounding boxes in descending order of score. + /// + /// + /// A 4-D float `Tensor` of shape `[batch_size, num_boxes, q, 4]`. If `q` + /// is 1 then same boxes are used for all classes otherwise, if `q` is equal + /// to number of classes, class-specific boxes are used. + /// + /// + /// A 3-D float `Tensor` of shape `[batch_size, num_boxes, num_classes]` + /// representing a single score corresponding to each box(each row of boxes). + /// + /// + /// A scalar integer `Tensor` representing the + /// maximum number of boxes to be selected by non-max suppression per class + /// + /// + /// A int32 scalar representing maximum number of boxes retained + /// over all classes.Note that setting this value to a large number may + /// result in OOM error depending on the system workload. + /// + /// + /// A float representing the threshold for deciding whether boxes + /// overlap too much with respect to IOU. + /// + /// + /// A float representing the threshold for deciding when to + /// remove boxes based on score. + /// + /// + /// If false, the output nmsed boxes, scores and classes are + /// padded/clipped to `max_total_size`. If true, the output nmsed boxes, scores and classes are padded to be of length `max_size_per_class`*`num_classes`, + /// unless it exceeds `max_total_size` in which case it is clipped to `max_total_size`. Defaults to false. + /// + /// + /// If true, the coordinates of output nmsed boxes will be clipped + /// to[0, 1]. If false, output the box coordinates as it is. Defaults to true. + /// + /// + /// 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor containing the non-max suppressed boxes. + /// 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing the scores for the boxes. + /// 'nmsed_classes': A [batch_size, max_detections] float32 tensor containing the class for boxes. + /// 'valid_detections': A [batch_size] int32 tensor indicating the number of + /// valid detections per batch item. Only the top valid_detections[i] entries + /// in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the + /// entries are zero paddings. + /// + public (Tensor, Tensor, Tensor, Tensor) combined_non_max_suppression( + Tensor boxes, + Tensor scores, + int max_output_size_per_class, + int max_total_size, + float iou_threshold, + float score_threshold, + bool pad_per_class = false, + bool clip_boxes = true) + { + var iou_threshold_t = ops.convert_to_tensor(iou_threshold, TF_DataType.TF_FLOAT, name: "iou_threshold"); + var score_threshold_t = ops.convert_to_tensor(score_threshold, TF_DataType.TF_FLOAT, name: "score_threshold"); + var max_total_size_t = ops.convert_to_tensor(max_total_size); + var max_output_size_per_class_t = ops.convert_to_tensor(max_output_size_per_class); + return gen_image_ops.combined_non_max_suppression(boxes, scores, max_output_size_per_class_t, max_total_size_t, + iou_threshold_t, score_threshold_t, pad_per_class, clip_boxes); + } /// /// Extracts crops from the input image tensor and resizes them using bilinear sampling or nearest neighbor sampling (possibly with aspect ratio change) to a common output size specified by crop_size. This is more general than the crop_to_bounding_box op which extracts a fixed size slice from the input image and does not allow resizing or aspect ratio change. @@ -187,7 +282,19 @@ public Tensor decode_jpeg(Tensor contents, /// A name for the operation (optional). /// A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. public Tensor crop_and_resize(Tensor image, Tensor boxes, Tensor box_ind, Tensor crop_size, string method = "bilinear", float extrapolation_value = 0f, string name = null) => - image_ops_impl.crop_and_resize(image, boxes, box_ind, crop_size, method, extrapolation_value, name); + gen_image_ops.crop_and_resize(image, boxes, box_ind, crop_size, method, extrapolation_value, name); + + public Tensor decode_jpeg(Tensor contents, + int channels = 0, + int ratio = 1, + bool fancy_upscaling = true, + bool try_recover_truncated = false, + int acceptable_fraction = 1, + string dct_method = "", + string name = null) + => gen_image_ops.decode_jpeg(contents, channels: channels, ratio: ratio, + fancy_upscaling: fancy_upscaling, try_recover_truncated: try_recover_truncated, + acceptable_fraction: acceptable_fraction, dct_method: dct_method); public Tensor extract_glimpse(Tensor input, Tensor size, Tensor offsets, bool centered = true, bool normalized = true, bool uniform_noise = true, string name = null) diff --git a/src/TensorFlowNET.Core/APIs/tf.math.cs b/src/TensorFlowNET.Core/APIs/tf.math.cs index ffbc43738..da54a9dd7 100644 --- a/src/TensorFlowNET.Core/APIs/tf.math.cs +++ b/src/TensorFlowNET.Core/APIs/tf.math.cs @@ -46,6 +46,17 @@ public Tensor multiply(Tensor x, Tensor y, string name = null) public Tensor divide_no_nan(Tensor a, Tensor b, string name = null) => math_ops.div_no_nan(a, b); + /// + /// Computes the Euclidean norm of elements across dimensions of a tensor. + /// + /// The tensor to reduce. Should have numeric type. + /// The dimensions to reduce. If `None` (the default), reduces all dimensions.Must be in the range `[-rank(input_tensor), rank(input_tensor))` + /// If true, retains reduced dimensions with length 1. + /// A name for the operation (optional). + /// The reduced tensor, of the same dtype as the input_tensor. + public Tensor reduce_euclidean_norm(Tensor input_tensor, Axis? axis = null, bool keepdims = false, string name = null) + => math_ops.reduce_euclidean_norm(input_tensor, axis: axis, keepdims: keepdims, name); + public Tensor square(Tensor x, string name = null) => math_ops.square(x, name: name); @@ -611,5 +622,7 @@ public Tensor squared_difference(Tensor x, Tensor y, string name = null) => gen_math_ops.squared_difference(x: x, y: y, name: name); public Tensor complex(Tensor real, Tensor imag, Tensorflow.TF_DataType? dtype = null, string name = null) => gen_ops.complex(real, imag, dtype, name); + public Tensor exp(Tensor x, + string name = null) => gen_math_ops.exp(x, name); } } diff --git a/src/TensorFlowNET.Core/APIs/tf.nn.cs b/src/TensorFlowNET.Core/APIs/tf.nn.cs index e5cd4e569..397c68c7c 100644 --- a/src/TensorFlowNET.Core/APIs/tf.nn.cs +++ b/src/TensorFlowNET.Core/APIs/tf.nn.cs @@ -144,16 +144,8 @@ public Tensor batch_normalization(Tensor x, Tensor offset, Tensor scale, float variance_epsilon, - string name = null) - { - var inv = math_ops.rsqrt(variance + variance_epsilon); - tf_with(ops.name_scope(name, "batchnorm", (x, mean, variance, scale, offset)), scope => - { - if (scale != null) inv *= scale; - }); - if (offset != null) return x * math_ops.cast(inv, x.dtype) + math_ops.cast(offset - mean * inv, dtype: x.dtype); - else return x * math_ops.cast(inv, x.dtype) + math_ops.cast(-mean * inv, dtype: x.dtype); - } + string name = null) => nn_impl.batch_normalization(x, mean, variance, offset, scale, variance_epsilon, name); + public Tensor max_pool(Tensor value, int[] ksize, int[] strides, string padding, string data_format = "NHWC", string name = null) => nn_ops.max_pool(value, ksize, strides, padding, data_format: data_format, name: name); diff --git a/src/TensorFlowNET.Core/APIs/tf.reshape.cs b/src/TensorFlowNET.Core/APIs/tf.reshape.cs index 5da7b795f..102a81323 100644 --- a/src/TensorFlowNET.Core/APIs/tf.reshape.cs +++ b/src/TensorFlowNET.Core/APIs/tf.reshape.cs @@ -31,6 +31,6 @@ public Tensor reshape(Tensor tensor, public Tensor reshape(Tensor tensor, object[] shape, string name = null) - => gen_array_ops.reshape(tensor, ops.convert_to_tensor(shape), name); + => array_ops.reshape(tensor, shape, name); } } diff --git a/src/TensorFlowNET.Core/APIs/tf.tensor.cs b/src/TensorFlowNET.Core/APIs/tf.tensor.cs index 45aebc0cd..b03168ab3 100644 --- a/src/TensorFlowNET.Core/APIs/tf.tensor.cs +++ b/src/TensorFlowNET.Core/APIs/tf.tensor.cs @@ -68,20 +68,27 @@ public Tensor strided_slice(Tensor input, T[] begin, T[] end, T[] strides = n /// A name for the operation (optional) /// if num_or_size_splits is a scalar returns num_or_size_splits Tensor objects; /// if num_or_size_splits is a 1-D Tensor returns num_or_size_splits.get_shape[0] Tensor objects resulting from splitting value. - public Tensor[] split(Tensor value, int num_split, Tensor axis, string name = null) + public Tensor[] split(Tensor value, int num_split, Axis axis, string name = null) => array_ops.split( value: value, num_or_size_splits: num_split, axis: axis, name: name); - public Tensor[] split(Tensor value, int num_split, int axis, string name = null) + public Tensor[] split(Tensor value, int[] num_split, Axis axis, string name = null) => array_ops.split( value: value, num_or_size_splits: num_split, - axis: ops.convert_to_tensor(axis), + axis: axis, name: name); + //public Tensor[] split(Tensor value, int num_split, Axis axis, string name = null) + // => array_ops.split( + // value: value, + // num_or_size_splits: num_split, + // axis: axis, + // name: name); + public Tensor ensure_shape(Tensor x, Shape shape, string name = null) { return gen_ops.ensure_shape(x, shape, name); diff --git a/src/TensorFlowNET.Core/APIs/tf.tile.cs b/src/TensorFlowNET.Core/APIs/tf.tile.cs index 65975ac83..a3b497e8a 100644 --- a/src/TensorFlowNET.Core/APIs/tf.tile.cs +++ b/src/TensorFlowNET.Core/APIs/tf.tile.cs @@ -23,7 +23,7 @@ public Tensor tile(Tensor input, Tensor multiples, string name = null) => gen_array_ops.tile(input, multiples, name); public Tensor tile(Tensor input, object[] multiples, string name = null) - => gen_array_ops.tile(input, ops.convert_to_tensor(multiples), name); + => array_ops.tile(input, constant_op.constant(shape_utils.from_object_array(multiples).dims), name); public Tensor tile(Tensor input, Shape multiples, string name = null) { diff --git a/src/TensorFlowNET.Core/Framework/IndexedSlices.cs b/src/TensorFlowNET.Core/Framework/IndexedSlices.cs index 24d356fbb..bac5e6fb1 100644 --- a/src/TensorFlowNET.Core/Framework/IndexedSlices.cs +++ b/src/TensorFlowNET.Core/Framework/IndexedSlices.cs @@ -49,12 +49,25 @@ public IndexedSlices(Tensor values, Tensor indices, Tensor dense_shape = null) public static implicit operator Tensor(IndexedSlices indexedSlices) { - return indexedSlices.values; + return _indexed_slices_to_tensor(indexedSlices); } public static implicit operator IndexedSlices(Tensor tensor) { return tensor.Tag as IndexedSlices; } + + /// + /// Converts an IndexedSlices object `value` to a Tensor. + /// + /// + /// + /// + /// + /// + public static Tensor _indexed_slices_to_tensor(IndexedSlices indexedSlices, TF_DataType dtype = TF_DataType.DtInvalid, String name = "", bool as_ref = false) + { + return gen_math_ops.unsorted_segment_sum(indexedSlices.values, indexedSlices.indices, indexedSlices.dense_shape.slice(0)); + } } } diff --git a/src/TensorFlowNET.Core/GlobalUsing.cs b/src/TensorFlowNET.Core/GlobalUsing.cs index 2fd5b437b..7e02c9083 100644 --- a/src/TensorFlowNET.Core/GlobalUsing.cs +++ b/src/TensorFlowNET.Core/GlobalUsing.cs @@ -3,4 +3,7 @@ global using System.Text; global using System.Collections; global using System.Data; -global using System.Linq; \ No newline at end of file +global using System.Linq; +global using Tensorflow.Keras.Engine; +global using Tensorflow.Framework.Models; +global using static Tensorflow.Binding; \ No newline at end of file diff --git a/src/TensorFlowNET.Core/Gradients/array_grad.cs b/src/TensorFlowNET.Core/Gradients/array_grad.cs index 1b6bc95ee..4b7027992 100644 --- a/src/TensorFlowNET.Core/Gradients/array_grad.cs +++ b/src/TensorFlowNET.Core/Gradients/array_grad.cs @@ -373,5 +373,13 @@ public static Tensor[] _TransposeGrad(Operation op, Tensor[] grads) var p = op.inputs[1]; return new Tensor[] { array_ops.transpose(grads[0], array_ops.invert_permutation(p)), null }; } + + [RegisterGradient("ReverseV2")] + public static Tensor[] _ReverseV2Grad(Operation op, Tensor[] grads) + { + var grad = grads[0]; + var axis = op.inputs[1]; + return new Tensor[] { array_ops.reverse(grad, axis), null }; + } } } diff --git a/src/TensorFlowNET.Core/Gradients/math_grad.cs b/src/TensorFlowNET.Core/Gradients/math_grad.cs index be1fbbba7..8c3f0f8bd 100644 --- a/src/TensorFlowNET.Core/Gradients/math_grad.cs +++ b/src/TensorFlowNET.Core/Gradients/math_grad.cs @@ -117,6 +117,137 @@ public static Tensor[] _DivNoNanGrad(Operation op, Tensor[] grads) }; } + public static string ellipsis = "..."; + [RegisterGradient("Einsum")] + public static Tensor[] _EinsumGrad(Operation op, Tensor[] grads) + { + // Gradient for Einsum. + string equation = (string)op.get_attr("equation"); + string[] split_equation = equation.Split(new string[] { "->" }, StringSplitOptions.None); + var input_subs = split_equation[0]; + var output_subs = split_equation[1]; + + if (op.inputs.Length == 1) + { + var input_shape = array_ops.shape(op.inputs[0]); + var reduced_label_set = new HashSet(new HashSet(input_subs).Except(new HashSet(output_subs + ellipsis))); + if (reduced_label_set.Count == 0) + return new Tensor[] { math_ops.einsum(string.Format("{0}->{1}", output_subs, input_subs), new Tensors(grads)) }; + return new Tensor[] { _GetGradReduced(new Tensors(grads), output_subs, input_subs, input_shape, reduced_label_set) }; + } + + string[] split_input_subs = input_subs.Split(new string[] { "," }, StringSplitOptions.None); + var x_subs = split_input_subs[0]; + var y_subs = split_input_subs[1]; + // Add ellipsis for broadcasted dimensions if any operand does not have it. + // This is because the equation "...ij,jk->ik" may be valid if the 0th input's + // batch shape is empty, but the VJP equation "jk,ik->...ij" is not valid + // because only the output subscripts contain ellipsis. + if (output_subs.Contains(ellipsis)) + { + if (!x_subs.Contains(ellipsis)) + x_subs += ellipsis; + if (!y_subs.Contains(ellipsis)) + y_subs += ellipsis; + } + // Obtain the gradients wrt the inputs x and y, without taking into account + // the unbroadcasting. + var x = op.inputs[0]; + var y = op.inputs[1]; + if (grads.GetDataType().is_complex()) + { + x = math_ops.conj(x); + y = math_ops.conj(y); + } + + var x_shape = array_ops.shape(x); + var y_shape = array_ops.shape(y); + var grad_x = _GetGradWrt(grads, y, x_shape, x_subs, y_subs, output_subs); + var grad_y = _GetGradWrt(grads, x, y_shape, y_subs, x_subs, output_subs); + + if (!output_subs.Contains(ellipsis)) + return new Tensor[] { grad_x, grad_y }; + var bx = _GetBcastSubshape(x_subs); + int bx_start = bx[0], bx_end = bx[1]; + var by = _GetBcastSubshape(y_subs); + int by_start = by[0], by_end = by[1]; + + var x_shape_static = x.shape; + var y_shape_static = y.shape; + if(x_shape_static.IsFullyDefined && + y_shape_static.IsFullyDefined && + x_shape_static[string.Format("{0}:{1}",bx_start,bx_end)] == y_shape_static[string.Format("{0}:{1}", by_start, by_end)]) + return new Tensor[] { grad_x, grad_y }; + + var r = gen_array_ops.broadcast_gradient_args(x_shape[string.Format("{0}:{1}", bx_start, bx_end)], + y_shape[string.Format("{0}:{1}", by_start, by_end)]); + var rx = r[0]; + var ry = r[1]; + grad_x = array_ops.reshape(math_ops.reduce_sum(grad_x, bx_start + rx), x_shape); + grad_y = array_ops.reshape(math_ops.reduce_sum(grad_y, by_start + ry), y_shape); + return new Tensor[] { grad_x, grad_y }; + } + protected static Tensor _GetGradWrt(Tensor[] output_grads, Tensor other_operand, Tensor input_shape, + string input_subs, string other_subs, string output_subs) + { + var reduced_label_set = new HashSet(new HashSet(input_subs).Except(new HashSet(output_subs + other_subs + "."))); + var left_subs = string.Join("", input_subs.Where(s => !reduced_label_set.Contains(s))); + var grad_reduced = math_ops.einsum(string.Format("{0},{1}->{2}", output_subs, other_subs, left_subs), new Tensors((Tensors)output_grads, other_operand)); + if (reduced_label_set.Count == 0) + return grad_reduced; + return _GetGradReduced(grad_reduced, left_subs, input_subs, input_shape, reduced_label_set); + } + protected static Tensor _GetGradReduced(Tensor output_grad, string output_subs, string input_subs, Tensor input_shape, HashSet reduced_label_set) + { + string reduced_subs; + Tensor reduced_dims; + List reduced_axes; + _GetReducedSubscripts(reduced_label_set, input_shape, input_subs, out reduced_subs, out reduced_dims, out reduced_axes); + bool has_repeated_labels = ( + new HashSet(input_subs).Count + new HashSet(output_subs).Count < + input_subs.Length + output_subs.Length); + var input_subs_without_reduced_labels = string.Join("", input_subs.Where(s => !reduced_label_set.Contains(s))); + + if (!has_repeated_labels && input_subs_without_reduced_labels == output_subs) + { + var reduced_shape = math_ops.reduced_shape(input_shape, ops.convert_to_tensor(reduced_axes)); + return gen_array_ops.broadcast_to(array_ops.reshape(output_grad, reduced_shape), input_shape); + } + else + { + var grad_shape_with_reduced_labels = array_ops.concat(new Tensor[] { reduced_dims, array_ops.shape(new Tensors(output_grad)) }, axis: 0); + var reduced_shape = array_ops.concat(new Tensor[] { array_ops.ones(reduced_label_set.Count, dtype: dtypes.int32), array_ops.shape(new Tensors(output_grad)) }, axis: 0); + var broadcasted_grad = gen_array_ops.broadcast_to(array_ops.reshape(output_grad, reduced_shape), grad_shape_with_reduced_labels); + return math_ops.einsum(string.Format("{0}->{1}", reduced_subs + output_subs, input_subs), new Tensors(broadcasted_grad)); + } + } + protected static void _GetReducedSubscripts(HashSet reduced_label_set, Tensor input_shape, string subscripts, out string reduced_subs, out Tensor reduced_dims, out List reduced_axes) + { + reduced_subs = string.Join("", reduced_label_set.Select(c => c.ToString())); + reduced_axes = reduced_subs.Select(s => _GetAxisFromLabel(subscripts, s)).ToList(); + reduced_dims = array_ops.stack(reduced_axes.Select(ax => input_shape[ax]).ToList()); + } + protected static int _GetAxisFromLabel(string subscripts, char label) + { + var splits = subscripts.Split(new string[] { ellipsis }, StringSplitOptions.None); + var index = splits[0].IndexOf(label); + if (index != -1) return index; + if (splits.Length < 2) throw new OutOfRangeError(); + index = splits[1].IndexOf(label); + if (index != -1) return index; + throw new ValueError(); + } + protected static int[] _GetBcastSubshape(string subscripts) + { + int start = subscripts.IndexOf(ellipsis); + if (start == -1) return new int[] { 0, 0 }; + int remaining = subscripts.Length - (start + ellipsis.Length); + int end; + if (remaining > 0) end = remaining; + else throw new Exception(); + return new int[] { start, end }; + } + /// /// Returns grad * exp(x). /// diff --git a/src/TensorFlowNET.Core/Gradients/nn_grad.cs b/src/TensorFlowNET.Core/Gradients/nn_grad.cs index a1ac97a97..a43a91b9a 100644 --- a/src/TensorFlowNET.Core/Gradients/nn_grad.cs +++ b/src/TensorFlowNET.Core/Gradients/nn_grad.cs @@ -365,6 +365,23 @@ public static Tensor[] _MaxPoolGrad(Operation op, Tensor[] grads) }; } + [RegisterGradient("AvgPool")] + public static Tensor[] _AvgPoolGrad(Operation op, Tensor[] grads) + { + Tensor grad = grads[0]; + + return new Tensor[] + { + gen_nn_ops.avg_pool_grad( + array_ops.shape(op.inputs[0]), + grad, + op.get_attr_list("ksize"), + op.get_attr_list("strides"), + op.get_attr("padding"), + op.get_attr("data_format")) + }; + } + /// /// Return the gradients for TopK. /// diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/ExponentialArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/ExponentialArgs.cs new file mode 100644 index 000000000..ef024971d --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/ExponentialArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class ExponentialArgs : LayerArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/HardSigmoidArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/HardSigmoidArgs.cs new file mode 100644 index 000000000..788e0f36d --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/HardSigmoidArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class HardSigmoidArgs : LayerArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SELUArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SELUArgs.cs new file mode 100644 index 000000000..eb0e18446 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SELUArgs.cs @@ -0,0 +1,11 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class SELUArgs : LayerArgs + { + + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftplusArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftplusArgs.cs new file mode 100644 index 000000000..7b4f20795 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftplusArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class SoftplusArgs : LayerArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftsignArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftsignArgs.cs new file mode 100644 index 000000000..4e23d261d --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftsignArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class SoftsignArgs : LayerArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SwishArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SwishArgs.cs new file mode 100644 index 000000000..3dea06a23 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SwishArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class SwishArgs : LayerArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/TanhArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/TanhArgs.cs new file mode 100644 index 000000000..5df41b71b --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/TanhArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class TanhArgs : LayerArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Convolution/Conv2DTransposeArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Convolution/Conv2DTransposeArgs.cs new file mode 100644 index 000000000..3daba9465 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Convolution/Conv2DTransposeArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class Conv2DTransposeArgs : Conv2DArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/AddArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/AddArgs.cs new file mode 100644 index 000000000..016d58203 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/AddArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class AddArgs : MergeArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/ConcatenateArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/ConcatenateArgs.cs new file mode 100644 index 000000000..4a81d139d --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/ConcatenateArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class ConcatenateArgs : MergeArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/SubtractArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/SubtractArgs.cs new file mode 100644 index 000000000..1e3621cb6 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/SubtractArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class SubtractArgs : MergeArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling1DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling1DArgs.cs new file mode 100644 index 000000000..e73aff766 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling1DArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class GlobalAveragePooling1DArgs : Pooling1DArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling2DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling2DArgs.cs new file mode 100644 index 000000000..d143cf471 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling2DArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class GlobalAveragePooling2DArgs : Pooling2DArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling1DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling1DArgs.cs new file mode 100644 index 000000000..e03227feb --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling1DArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class GlobalMaxPooling1DArgs : Pooling1DArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling2DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling2DArgs.cs new file mode 100644 index 000000000..a95cac836 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling2DArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class GlobalMaxPooling2DArgs : Pooling2DArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/MaxPooling1DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/MaxPooling1DArgs.cs new file mode 100644 index 000000000..4cfff2c15 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/MaxPooling1DArgs.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class MaxPooling1DArgs : Pooling1DArgs + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/BidirectionalArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/BidirectionalArgs.cs new file mode 100644 index 000000000..d658a82e9 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/BidirectionalArgs.cs @@ -0,0 +1,20 @@ +using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.NumPy; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class BidirectionalArgs : AutoSerializeLayerArgs + { + [JsonProperty("layer")] + public ILayer Layer { get; set; } + [JsonProperty("merge_mode")] + public string? MergeMode { get; set; } + [JsonProperty("backward_layer")] + public ILayer BackwardLayer { get; set; } + public NDArray Weights { get; set; } + } + +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUArgs.cs new file mode 100644 index 000000000..cdc3097e9 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUArgs.cs @@ -0,0 +1,29 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class GRUArgs : AutoSerializeLayerArgs + { + public int Units { get; set; } + public Activation Activation { get; set; } + public Activation RecurrentActivation { get; set; } + public bool UseBias { get; set; } = true; + public float Dropout { get; set; } = .0f; + public float RecurrentDropout { get; set; } = .0f; + public IInitializer KernelInitializer { get; set; } + public IInitializer RecurrentInitializer { get; set; } + public IInitializer BiasInitializer { get; set; } + public bool ReturnSequences { get;set; } + public bool ReturnState { get;set; } + public bool GoBackwards { get;set; } + public bool Stateful { get;set; } + public bool Unroll { get;set; } + public bool TimeMajor { get;set; } + public bool ResetAfter { get;set; } + public int Implementation { get; set; } = 2; + + } + +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUCellArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUCellArgs.cs new file mode 100644 index 000000000..624756afe --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUCellArgs.cs @@ -0,0 +1,39 @@ +using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class GRUCellArgs : AutoSerializeLayerArgs + { + [JsonProperty("units")] + public int Units { get; set; } + // TODO(Rinne): lack of initialized value of Activation. Merging keras + // into tf.net could resolve it. + [JsonProperty("activation")] + public Activation Activation { get; set; } + [JsonProperty("recurrent_activation")] + public Activation RecurrentActivation { get; set; } + [JsonProperty("use_bias")] + public bool UseBias { get; set; } = true; + [JsonProperty("dropout")] + public float Dropout { get; set; } = .0f; + [JsonProperty("recurrent_dropout")] + public float RecurrentDropout { get; set; } = .0f; + [JsonProperty("kernel_initializer")] + public IInitializer KernelInitializer { get; set; } + [JsonProperty("recurrent_initializer")] + public IInitializer RecurrentInitializer { get; set; } + [JsonProperty("bias_initializer")] + public IInitializer BiasInitializer { get; set; } + [JsonProperty("reset_after")] + public bool ResetAfter { get;set; } + [JsonProperty("implementation")] + public int Implementation { get; set; } = 2; + + + + } + +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs new file mode 100644 index 000000000..d441dc828 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs @@ -0,0 +1,13 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class GRUOptionalArgs + { + public string Identifier => "GRU"; + + public Tensor Mask { get; set; } = null; + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMArgs.cs index db76fda06..a6beb77e8 100644 --- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMArgs.cs +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMArgs.cs @@ -1,9 +1,14 @@ -namespace Tensorflow.Keras.ArgsDefinition.Rnn +namespace Tensorflow.Keras.ArgsDefinition { public class LSTMArgs : RNNArgs { // TODO: maybe change the `RNNArgs` and implement this class. public bool UnitForgetBias { get; set; } public int Implementation { get; set; } + + public LSTMArgs Clone() + { + return (LSTMArgs)MemberwiseClone(); + } } } diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMCellArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMCellArgs.cs index 786236e4d..f45032312 100644 --- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMCellArgs.cs +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMCellArgs.cs @@ -1,7 +1,7 @@ using Newtonsoft.Json; using static Tensorflow.Binding; -namespace Tensorflow.Keras.ArgsDefinition.Rnn +namespace Tensorflow.Keras.ArgsDefinition { // TODO: complete the implementation public class LSTMCellArgs : AutoSerializeLayerArgs diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RNNArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RNNArgs.cs index 2d7fb001a..d0b73ba44 100644 --- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RNNArgs.cs +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RNNArgs.cs @@ -1,8 +1,8 @@ using Newtonsoft.Json; using System.Collections.Generic; -using Tensorflow.Keras.Layers.Rnn; +using Tensorflow.Keras.Layers; -namespace Tensorflow.Keras.ArgsDefinition.Rnn +namespace Tensorflow.Keras.ArgsDefinition { // TODO(Rinne): add regularizers. public class RNNArgs : AutoSerializeLayerArgs @@ -23,16 +23,27 @@ public class RNNArgs : AutoSerializeLayerArgs public int? InputDim { get; set; } public int? InputLength { get; set; } // TODO: Add `num_constants` and `zero_output_for_mask`. - + [JsonProperty("units")] public int Units { get; set; } + [JsonProperty("activation")] public Activation Activation { get; set; } + [JsonProperty("recurrent_activation")] public Activation RecurrentActivation { get; set; } + [JsonProperty("use_bias")] public bool UseBias { get; set; } = true; public IInitializer KernelInitializer { get; set; } public IInitializer RecurrentInitializer { get; set; } public IInitializer BiasInitializer { get; set; } + [JsonProperty("dropout")] public float Dropout { get; set; } = .0f; + [JsonProperty("zero_output_for_mask")] public bool ZeroOutputForMask { get; set; } = false; + [JsonProperty("recurrent_dropout")] public float RecurrentDropout { get; set; } = .0f; + + public RNNArgs Clone() + { + return (RNNArgs)MemberwiseClone(); + } } } diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RnnOptionalArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RnnOptionalArgs.cs index 64b500bba..a6520589d 100644 --- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RnnOptionalArgs.cs +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RnnOptionalArgs.cs @@ -3,7 +3,7 @@ using System.Text; using Tensorflow.Common.Types; -namespace Tensorflow.Keras.ArgsDefinition.Rnn +namespace Tensorflow.Keras.ArgsDefinition { public class RnnOptionalArgs: IOptionalArgs { diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNArgs.cs index fcfd694d1..e45ef79d0 100644 --- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNArgs.cs +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNArgs.cs @@ -1,4 +1,4 @@ -namespace Tensorflow.Keras.ArgsDefinition.Rnn +namespace Tensorflow.Keras.ArgsDefinition { public class SimpleRNNArgs : RNNArgs { diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNCellArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNCellArgs.cs index d21d61905..b84ea21b3 100644 --- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNCellArgs.cs +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNCellArgs.cs @@ -1,6 +1,6 @@ using Newtonsoft.Json; -namespace Tensorflow.Keras.ArgsDefinition.Rnn +namespace Tensorflow.Keras.ArgsDefinition { public class SimpleRNNCellArgs: AutoSerializeLayerArgs { diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/StackedRNNCellsArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/StackedRNNCellsArgs.cs index 50a6127df..2600f14ee 100644 --- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/StackedRNNCellsArgs.cs +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/StackedRNNCellsArgs.cs @@ -1,7 +1,7 @@ using System.Collections.Generic; -using Tensorflow.Keras.Layers.Rnn; +using Tensorflow.Keras.Layers; -namespace Tensorflow.Keras.ArgsDefinition.Rnn +namespace Tensorflow.Keras.ArgsDefinition { public class StackedRNNCellsArgs : LayerArgs { diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/WrapperArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/WrapperArgs.cs new file mode 100644 index 000000000..ec8e16d59 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/WrapperArgs.cs @@ -0,0 +1,24 @@ +using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Text; + + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class WrapperArgs : AutoSerializeLayerArgs + { + [JsonProperty("layer")] + public ILayer Layer { get; set; } + + public WrapperArgs(ILayer layer) + { + Layer = layer; + } + + public static implicit operator WrapperArgs(BidirectionalArgs args) + => new WrapperArgs(args.Layer); + } + +} diff --git a/src/TensorFlowNET.Core/Keras/Engine/ICallback.cs b/src/TensorFlowNET.Core/Keras/Engine/ICallback.cs index 096dbd2ef..e114ca97f 100644 --- a/src/TensorFlowNET.Core/Keras/Engine/ICallback.cs +++ b/src/TensorFlowNET.Core/Keras/Engine/ICallback.cs @@ -14,6 +14,9 @@ public interface ICallback void on_predict_batch_end(long end_step, Dictionary logs); void on_predict_end(); void on_test_begin(); + void on_test_end(Dictionary logs); void on_test_batch_begin(long step); void on_test_batch_end(long end_step, Dictionary logs); + + } diff --git a/src/TensorFlowNET.Core/Keras/Engine/IModel.cs b/src/TensorFlowNET.Core/Keras/Engine/IModel.cs index ddc72aeec..19f3df9ba 100644 --- a/src/TensorFlowNET.Core/Keras/Engine/IModel.cs +++ b/src/TensorFlowNET.Core/Keras/Engine/IModel.cs @@ -60,7 +60,7 @@ void load_weights(string filepath, bool skip_mismatch = false, object options = null); - Dictionary evaluate(Tensor x, Tensor y, + Dictionary evaluate(NDArray x, NDArray y, int batch_size = -1, int verbose = 1, int steps = -1, diff --git a/src/TensorFlowNET.Core/Keras/Engine/KerasTensor.cs b/src/TensorFlowNET.Core/Keras/Engine/KerasTensor.cs new file mode 100644 index 000000000..5a264b631 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/Engine/KerasTensor.cs @@ -0,0 +1,75 @@ +namespace Tensorflow.Keras.Engine; + +/// +/// A representation of a Keras in/output during Functional API construction. +/// +public class KerasTensor +{ + private Tensors _original_tensors; + public Tensors original_tensors + { + get => _original_tensors; + set => _original_tensors = value; + } + + private Shape _inferred_value; + public Shape inferred_value => _inferred_value; + + private string _name; + private TensorSpec _type_spec; + public Shape shape => _type_spec.shape; + public TF_DataType dtype => _type_spec.dtype; + + public KerasTensor(TensorSpec type_spec, Shape inferred_value = null, string name = null) + { + _type_spec = type_spec; + _inferred_value = inferred_value; + _name = name; + } + + public static KerasTensor from_tensor(Tensor tensor) + { + var type_spec = tensor.ToTensorSpec(); + Shape? inferred_value = default; + if (tensor.dtype == TF_DataType.TF_INT32 && tensor.rank < 2) + { + inferred_value = tf.ones(tensor).shape; + } + var kt = new KerasTensor(type_spec, inferred_value: inferred_value, name: tensor.name); + kt.original_tensors = tensor; + return kt; + } + + public KerasTensor this[int idx] + => _original_tensors.First()[idx]; + + public KerasTensor this[params Slice[] slices] + => _original_tensors.First()[slices]; + + public override string ToString() + => _original_tensors.Length switch + { + > 1 => "[" + string.Join(", ", _original_tensors.Select(x => $"KerasTensor: shape={x.shape} dtype={x.dtype.as_numpy_name()}{GetInferredValueString()}")) + "]", + 1 => $"KerasTensor: shape={_original_tensors.shape} dtype={_original_tensors.dtype.as_numpy_name()}{GetInferredValueString()}", + _ => _original_tensors.ToString(), + }; + + private string GetInferredValueString() + => _inferred_value == null ? "" : $" inferred_value={_inferred_value}"; + + public static implicit operator Tensors(KerasTensor kt) + => kt._original_tensors; + + public static implicit operator Tensor(KerasTensor kt) + { + Tensor tensor = kt._original_tensors; + tensor.IsFromKerasTensor = true; + return tensor; + } + + public static implicit operator KerasTensor(Tensor tensor) + => from_tensor(tensor); + + public static implicit operator KerasTensor(Tensors tensors) + => from_tensor(tensors.First()); +} diff --git a/src/TensorFlowNET.Core/Keras/IOptimizerApi.cs b/src/TensorFlowNET.Core/Keras/IOptimizerApi.cs index 961ce91ae..6c15fd469 100644 --- a/src/TensorFlowNET.Core/Keras/IOptimizerApi.cs +++ b/src/TensorFlowNET.Core/Keras/IOptimizerApi.cs @@ -25,6 +25,27 @@ IOptimizer Adam(float learning_rate = 0.001f, bool amsgrad = false, string name = "Adam"); + /// + /// Adam enables L2 weight decay on gradients. + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + IOptimizer AdamW(float learning_rate = 0.001f, + float weight_decay = 0.004f, + float beta_1 = 0.9f, + float beta_2 = 0.999f, + float epsilon = 1e-7f, + bool amsgrad = false, + List no_decay_params = null, + string name = "AdamW"); + /// /// Construct a new RMSprop optimizer. /// @@ -42,6 +63,6 @@ IOptimizer RMSprop(float learning_rate = 0.001f, bool centered = false, string name = "RMSprop"); - IOptimizer SGD(float learning_rate); + IOptimizer SGD(float learning_rate = 0.01f, float momentum = 0f); } } diff --git a/src/TensorFlowNET.Core/Keras/Layers/ILayer.cs b/src/TensorFlowNET.Core/Keras/Layers/ILayer.cs index e94c8bf10..2f92c4e57 100644 --- a/src/TensorFlowNET.Core/Keras/Layers/ILayer.cs +++ b/src/TensorFlowNET.Core/Keras/Layers/ILayer.cs @@ -15,7 +15,7 @@ public interface ILayer: IWithTrackable, IKerasConfigable List Layers { get; } List InboundNodes { get; } List OutboundNodes { get; } - Tensors Apply(Tensors inputs, Tensors states = null, bool training = false, IOptionalArgs? optional_args = null); + Tensors Apply(Tensors inputs, Tensors states = null, bool? training = false, IOptionalArgs? optional_args = null); List TrainableVariables { get; } List TrainableWeights { get; } List NonTrainableWeights { get; } diff --git a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs index a19508d42..5e08eadc4 100644 --- a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs +++ b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs @@ -1,6 +1,7 @@ using System; using Tensorflow.Framework.Models; -using Tensorflow.Keras.Layers.Rnn; +using Tensorflow.Keras.Engine; +using Tensorflow.Keras.Layers; using Tensorflow.NumPy; using static Google.Protobuf.Reflection.FieldDescriptorProto.Types; @@ -135,7 +136,7 @@ public ILayer EinsumDense(string equation, public ILayer GlobalMaxPooling1D(string data_format = "channels_last"); public ILayer GlobalMaxPooling2D(string data_format = "channels_last"); - public Tensors Input(Shape shape = null, + public KerasTensor Input(Shape shape = null, int batch_size = -1, string name = null, TF_DataType dtype = TF_DataType.DtInvalid, @@ -246,6 +247,49 @@ public ILayer RNN( bool time_major = false ); + public IRnnCell GRUCell( + int units, + string activation = "tanh", + string recurrent_activation = "sigmoid", + bool use_bias = true, + string kernel_initializer = "glorot_uniform", + string recurrent_initializer = "orthogonal", + string bias_initializer = "zeros", + float dropout = 0f, + float recurrent_dropout = 0f, + bool reset_after = true); + + public ILayer GRU( + int units, + string activation = "tanh", + string recurrent_activation = "sigmoid", + bool use_bias = true, + string kernel_initializer = "glorot_uniform", + string recurrent_initializer = "orthogonal", + string bias_initializer = "zeros", + float dropout = 0f, + float recurrent_dropout = 0f, + bool return_sequences = false, + bool return_state = false, + bool go_backwards = false, + bool stateful = false, + bool unroll = false, + bool time_major = false, + bool reset_after = true + ); + + /// + /// Bidirectional wrapper for RNNs. + /// + /// `keras.layers.RNN` instance, such as `keras.layers.LSTM` or `keras.layers.GRU` + /// automatically. + /// + public ILayer Bidirectional( + ILayer layer, + string merge_mode = "concat", + NDArray weights = null, + ILayer backward_layer = null); + public ILayer Subtract(); } } diff --git a/src/TensorFlowNET.Core/Keras/Layers/Rnn/IRnnCell.cs b/src/TensorFlowNET.Core/Keras/Layers/Rnn/IRnnCell.cs index 8d6fbc976..43df75b17 100644 --- a/src/TensorFlowNET.Core/Keras/Layers/Rnn/IRnnCell.cs +++ b/src/TensorFlowNET.Core/Keras/Layers/Rnn/IRnnCell.cs @@ -3,7 +3,7 @@ using System.Text; using Tensorflow.Common.Types; -namespace Tensorflow.Keras.Layers.Rnn +namespace Tensorflow.Keras.Layers { public interface IRnnCell: ILayer { diff --git a/src/TensorFlowNET.Core/Keras/Layers/Rnn/IStackedRnnCells.cs b/src/TensorFlowNET.Core/Keras/Layers/Rnn/IStackedRnnCells.cs index e73244a51..8cf6150d3 100644 --- a/src/TensorFlowNET.Core/Keras/Layers/Rnn/IStackedRnnCells.cs +++ b/src/TensorFlowNET.Core/Keras/Layers/Rnn/IStackedRnnCells.cs @@ -2,7 +2,7 @@ using System.Collections.Generic; using System.Text; -namespace Tensorflow.Keras.Layers.Rnn +namespace Tensorflow.Keras.Layers { public interface IStackedRnnCells : IRnnCell { diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs index f29879b0f..c0f9e695d 100644 --- a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs +++ b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs @@ -4,6 +4,8 @@ using System.Linq; using System.Text; using Tensorflow.Util; +using Razorvine.Pickle; +using Tensorflow.NumPy.Pickle; using static Tensorflow.Binding; namespace Tensorflow.NumPy @@ -97,6 +99,14 @@ Array ReadValueMatrix(BinaryReader reader, Array matrix, int bytes, Type type, i return matrix; } + Array ReadObjectMatrix(BinaryReader reader, Array matrix, int[] shape) + { + Stream deflateStream = reader.BaseStream; + BufferedStream bufferedStream = new BufferedStream(deflateStream); + var unpickler = new Unpickler(); + return (MultiArrayPickleWarpper)unpickler.load(bufferedStream); + } + public (NDArray, NDArray) meshgrid(T[] array, bool copy = true, bool sparse = false) { var tensors = array_ops.meshgrid(array, copy: copy, sparse: sparse); diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs index 05f53d5e7..199e5ced3 100644 --- a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs +++ b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs @@ -27,8 +27,14 @@ public Array LoadMatrix(Stream stream) Array matrix = Array.CreateInstance(type, shape); //if (type == typeof(String)) - //return ReadStringMatrix(reader, matrix, bytes, type, shape); - return ReadValueMatrix(reader, matrix, bytes, type, shape); + //return ReadStringMatrix(reader, matrix, bytes, type, shape); + + if (type == typeof(Object)) + return ReadObjectMatrix(reader, matrix, shape); + else + { + return ReadValueMatrix(reader, matrix, bytes, type, shape); + } } } @@ -37,7 +43,7 @@ public T Load(Stream stream) ICloneable, IList, ICollection, IEnumerable, IStructuralComparable, IStructuralEquatable { // if (typeof(T).IsArray && (typeof(T).GetElementType().IsArray || typeof(T).GetElementType() == typeof(string))) - // return LoadJagged(stream) as T; + // return LoadJagged(stream) as T; return LoadMatrix(stream) as T; } @@ -93,7 +99,7 @@ bool ParseReader(BinaryReader reader, out int bytes, out Type t, out int[] shape Type GetType(string dtype, out int bytes, out bool? isLittleEndian) { isLittleEndian = IsLittleEndian(dtype); - bytes = Int32.Parse(dtype.Substring(2)); + bytes = dtype.Length > 2 ? Int32.Parse(dtype.Substring(2)) : 0; string typeCode = dtype.Substring(1); @@ -121,6 +127,8 @@ Type GetType(string dtype, out int bytes, out bool? isLittleEndian) return typeof(Double); if (typeCode.StartsWith("S")) return typeof(String); + if (typeCode.StartsWith("O")) + return typeof(Object); throw new NotSupportedException(); } diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/RandomizedImpl.cs b/src/TensorFlowNET.Core/NumPy/Implementation/RandomizedImpl.cs index 064c7362f..a707e8aae 100644 --- a/src/TensorFlowNET.Core/NumPy/Implementation/RandomizedImpl.cs +++ b/src/TensorFlowNET.Core/NumPy/Implementation/RandomizedImpl.cs @@ -14,9 +14,9 @@ public class RandomizedImpl public NDArray permutation(NDArray x) => new NDArray(random_ops.random_shuffle(x)); [AutoNumPy] - public void shuffle(NDArray x) + public void shuffle(NDArray x, int? seed = null) { - var y = random_ops.random_shuffle(x); + var y = random_ops.random_shuffle(x, seed); Marshal.Copy(y.BufferToArray(), 0, x.TensorDataPointer, (int)x.bytesize); } diff --git a/src/TensorFlowNET.Core/NumPy/NDArrayConverter.cs b/src/TensorFlowNET.Core/NumPy/NDArrayConverter.cs index c8c2d45fa..4c64eba74 100644 --- a/src/TensorFlowNET.Core/NumPy/NDArrayConverter.cs +++ b/src/TensorFlowNET.Core/NumPy/NDArrayConverter.cs @@ -10,6 +10,7 @@ public class NDArrayConverter public unsafe static T Scalar(NDArray nd) where T : unmanaged => nd.dtype switch { + TF_DataType.TF_BOOL => Scalar(*(bool*)nd.data), TF_DataType.TF_UINT8 => Scalar(*(byte*)nd.data), TF_DataType.TF_FLOAT => Scalar(*(float*)nd.data), TF_DataType.TF_INT32 => Scalar(*(int*)nd.data), diff --git a/src/TensorFlowNET.Core/NumPy/NumPy.Sorting.Searching.Counting.cs b/src/TensorFlowNET.Core/NumPy/NumPy.Sorting.Searching.Counting.cs index 5182d5726..4cad36e0b 100644 --- a/src/TensorFlowNET.Core/NumPy/NumPy.Sorting.Searching.Counting.cs +++ b/src/TensorFlowNET.Core/NumPy/NumPy.Sorting.Searching.Counting.cs @@ -13,6 +13,10 @@ public partial class np public static NDArray argmax(NDArray a, Axis? axis = null) => new NDArray(math_ops.argmax(a, axis ?? 0)); + [AutoNumPy] + public static NDArray argmin(NDArray a, Axis? axis = null) + => new NDArray(math_ops.argmin(a, axis ?? 0)); + [AutoNumPy] public static NDArray argsort(NDArray a, Axis? axis = null) => new NDArray(sort_ops.argsort(a, axis: axis ?? -1)); diff --git a/src/TensorFlowNET.Core/NumPy/NumPy.Statistics.cs b/src/TensorFlowNET.Core/NumPy/NumPy.Statistics.cs index 5d86b1b39..bce16ec9f 100644 --- a/src/TensorFlowNET.Core/NumPy/NumPy.Statistics.cs +++ b/src/TensorFlowNET.Core/NumPy/NumPy.Statistics.cs @@ -10,10 +10,10 @@ namespace Tensorflow.NumPy public partial class np { [AutoNumPy] - public static NDArray amin(NDArray x, int axis = 0) => new NDArray(tf.arg_min(x, axis)); + public static NDArray amin(NDArray x, int axis = 0) => new NDArray(tf.min(x, axis)); [AutoNumPy] - public static NDArray amax(NDArray x, int axis = 0) => new NDArray(tf.math.argmax(x, axis)); + public static NDArray amax(NDArray x, int axis = 0) => new NDArray(tf.max(x, axis)); [AutoNumPy] public static NDArray average(NDArray a, int axis = -1, NDArray? weights = null, bool returned = false) diff --git a/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs b/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs index 5bc97952b..2559638b3 100644 --- a/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs +++ b/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs @@ -85,5 +85,11 @@ public static NDArray dot(NDArray x1, NDArray x2, NDArray? axes = null, string? [AutoNumPy] public static NDArray add(NDArray x, NDArray y) => new NDArray(math_ops.add(x, y)); + + [AutoNumPy] + public static NDArray greater(NDArray x, NDArray y) => new NDArray(tf.greater(x, y)); + + [AutoNumPy] + public static NDArray less(NDArray x, NDArray y) => new NDArray(tf.less(x, y)); } } diff --git a/src/TensorFlowNET.Core/NumPy/Pickle/DTypePickleWarpper.cs b/src/TensorFlowNET.Core/NumPy/Pickle/DTypePickleWarpper.cs new file mode 100644 index 000000000..5dff6c16b --- /dev/null +++ b/src/TensorFlowNET.Core/NumPy/Pickle/DTypePickleWarpper.cs @@ -0,0 +1,20 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.NumPy.Pickle +{ + public class DTypePickleWarpper + { + TF_DataType dtype { get; set; } + public DTypePickleWarpper(TF_DataType dtype) + { + this.dtype = dtype; + } + public void __setstate__(object[] args) { } + public static implicit operator TF_DataType(DTypePickleWarpper dTypeWarpper) + { + return dTypeWarpper.dtype; + } + } +} diff --git a/src/TensorFlowNET.Core/NumPy/Pickle/DtypeConstructor.cs b/src/TensorFlowNET.Core/NumPy/Pickle/DtypeConstructor.cs new file mode 100644 index 000000000..160c7d4e9 --- /dev/null +++ b/src/TensorFlowNET.Core/NumPy/Pickle/DtypeConstructor.cs @@ -0,0 +1,52 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Text; +using Razorvine.Pickle; + +namespace Tensorflow.NumPy.Pickle +{ + /// + /// + /// + [SuppressMessage("ReSharper", "InconsistentNaming")] + [SuppressMessage("ReSharper", "MemberCanBePrivate.Global")] + [SuppressMessage("ReSharper", "MemberCanBeMadeStatic.Global")] + class DtypeConstructor : IObjectConstructor + { + public object construct(object[] args) + { + var typeCode = (string)args[0]; + TF_DataType dtype; + if (typeCode == "b1") + dtype = np.@bool; + else if (typeCode == "i1") + dtype = np.@byte; + else if (typeCode == "i2") + dtype = np.int16; + else if (typeCode == "i4") + dtype = np.int32; + else if (typeCode == "i8") + dtype = np.int64; + else if (typeCode == "u1") + dtype = np.ubyte; + else if (typeCode == "u2") + dtype = np.uint16; + else if (typeCode == "u4") + dtype = np.uint32; + else if (typeCode == "u8") + dtype = np.uint64; + else if (typeCode == "f4") + dtype = np.float32; + else if (typeCode == "f8") + dtype = np.float64; + else if (typeCode.StartsWith("S")) + dtype = np.@string; + else if (typeCode.StartsWith("O")) + dtype = np.@object; + else + throw new NotSupportedException(); + return new DTypePickleWarpper(dtype); + } + } +} diff --git a/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayConstructor.cs b/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayConstructor.cs new file mode 100644 index 000000000..885f368c4 --- /dev/null +++ b/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayConstructor.cs @@ -0,0 +1,53 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Text; +using Razorvine.Pickle; +using Razorvine.Pickle.Objects; + +namespace Tensorflow.NumPy.Pickle +{ + /// + /// Creates multiarrays of objects. Returns a primitive type multiarray such as int[][] if + /// the objects are ints, etc. + /// + [SuppressMessage("ReSharper", "InconsistentNaming")] + [SuppressMessage("ReSharper", "MemberCanBePrivate.Global")] + [SuppressMessage("ReSharper", "MemberCanBeMadeStatic.Global")] + public class MultiArrayConstructor : IObjectConstructor + { + public object construct(object[] args) + { + if (args.Length != 3) + throw new InvalidArgumentError($"Invalid number of arguments in MultiArrayConstructor._reconstruct. Expected three arguments. Given {args.Length} arguments."); + + var types = (ClassDictConstructor)args[0]; + if (types.module != "numpy" || types.name != "ndarray") + throw new RuntimeError("_reconstruct: First argument must be a sub-type of ndarray"); + + var arg1 = (object[])args[1]; + var dims = new int[arg1.Length]; + for (var i = 0; i < arg1.Length; i++) + { + dims[i] = (int)arg1[i]; + } + var shape = new Shape(dims); + + TF_DataType dtype; + string identifier; + if (args[2].GetType() == typeof(string)) + identifier = (string)args[2]; + else + identifier = Encoding.UTF8.GetString((byte[])args[2]); + switch (identifier) + { + case "u": dtype = np.uint32; break; + case "c": dtype = np.complex_; break; + case "f": dtype = np.float32; break; + case "b": dtype = np.@bool; break; + default: throw new NotImplementedException($"Unsupported data type: {args[2]}"); + } + return new MultiArrayPickleWarpper(shape, dtype); + } + } +} diff --git a/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayPickleWarpper.cs b/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayPickleWarpper.cs new file mode 100644 index 000000000..af8d1ecc2 --- /dev/null +++ b/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayPickleWarpper.cs @@ -0,0 +1,119 @@ +using Newtonsoft.Json.Linq; +using Serilog.Debugging; +using System; +using System.Collections; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.NumPy.Pickle +{ + public class MultiArrayPickleWarpper + { + public Shape reconstructedShape { get; set; } + public TF_DataType reconstructedDType { get; set; } + public NDArray reconstructedNDArray { get; set; } + public Array reconstructedMultiArray { get; set; } + public MultiArrayPickleWarpper(Shape shape, TF_DataType dtype) + { + reconstructedShape = shape; + reconstructedDType = dtype; + } + public void __setstate__(object[] args) + { + if (args.Length != 5) + throw new InvalidArgumentError($"Invalid number of arguments in NDArray.__setstate__. Expected five arguments. Given {args.Length} arguments."); + + var version = (int)args[0]; // version + + var arg1 = (object[])args[1]; + var dims = new int[arg1.Length]; + for (var i = 0; i < arg1.Length; i++) + { + dims[i] = (int)arg1[i]; + } + var _ShapeLike = new Shape(dims); // shape + + TF_DataType _DType_co = (DTypePickleWarpper)args[2]; // DType + + var F_continuous = (bool)args[3]; // F-continuous + if (F_continuous) + throw new InvalidArgumentError("Fortran Continuous memory layout is not supported. Please use C-continuous layout or check the data format."); + + var data = args[4]; // Data + /* + * If we ever need another pickle format, increment the version + * number. But we should still be able to handle the old versions. + */ + if (version < 0 || version > 4) + throw new ValueError($"can't handle version {version} of numpy.dtype pickle"); + + // TODO: Implement the missing details and checks from the official Numpy C code here. + // https://github.com/numpy/numpy/blob/2f0bd6e86a77e4401d0384d9a75edf9470c5deb6/numpy/core/src/multiarray/descriptor.c#L2761 + + if (data.GetType() == typeof(ArrayList)) + { + Reconstruct((ArrayList)data); + } + else + throw new NotImplementedException(""); + } + private void Reconstruct(ArrayList arrayList) + { + int ndim = 1; + var subArrayList = arrayList; + while (subArrayList.Count > 0 && subArrayList[0] != null && subArrayList[0].GetType() == typeof(ArrayList)) + { + subArrayList = (ArrayList)subArrayList[0]; + ndim += 1; + } + var type = subArrayList[0].GetType(); + if (type == typeof(int)) + { + if (ndim == 1) + { + int[] list = (int[])arrayList.ToArray(typeof(int)); + Shape shape = new Shape(new int[] { arrayList.Count }); + reconstructedMultiArray = list; + reconstructedNDArray = new NDArray(list, shape); + } + if (ndim == 2) + { + int secondDim = 0; + foreach (ArrayList subArray in arrayList) + { + secondDim = subArray.Count > secondDim ? subArray.Count : secondDim; + } + int[,] list = new int[arrayList.Count, secondDim]; + for (int i = 0; i < arrayList.Count; i++) + { + var subArray = (ArrayList?)arrayList[i]; + if (subArray == null) + throw new NullReferenceException(""); + for (int j = 0; j < subArray.Count; j++) + { + var element = subArray[j]; + if (element == null) + throw new NoNullAllowedException("the element of ArrayList cannot be null."); + list[i, j] = (int)element; + } + } + Shape shape = new Shape(new int[] { arrayList.Count, secondDim }); + reconstructedMultiArray = list; + reconstructedNDArray = new NDArray(list, shape); + } + if (ndim > 2) + throw new NotImplementedException("can't handle ArrayList with more than two dimensions."); + } + else + throw new NotImplementedException(""); + } + public static implicit operator Array(MultiArrayPickleWarpper arrayWarpper) + { + return arrayWarpper.reconstructedMultiArray; + } + public static implicit operator NDArray(MultiArrayPickleWarpper arrayWarpper) + { + return arrayWarpper.reconstructedNDArray; + } + } +} diff --git a/src/TensorFlowNET.Core/Numpy/Numpy.cs b/src/TensorFlowNET.Core/Numpy/Numpy.cs index 72d2e981c..fee2d63fc 100644 --- a/src/TensorFlowNET.Core/Numpy/Numpy.cs +++ b/src/TensorFlowNET.Core/Numpy/Numpy.cs @@ -43,7 +43,9 @@ public partial class np public static readonly TF_DataType @decimal = TF_DataType.TF_DOUBLE; public static readonly TF_DataType complex_ = TF_DataType.TF_COMPLEX; public static readonly TF_DataType complex64 = TF_DataType.TF_COMPLEX64; - public static readonly TF_DataType complex128 = TF_DataType.TF_COMPLEX128; + public static readonly TF_DataType complex128 = TF_DataType.TF_COMPLEX128; + public static readonly TF_DataType @string = TF_DataType.TF_STRING; + public static readonly TF_DataType @object = TF_DataType.TF_VARIANT; #endregion public static double nan => double.NaN; diff --git a/src/TensorFlowNET.Core/Operations/NnOps/RNNCell.cs b/src/TensorFlowNET.Core/Operations/NnOps/RNNCell.cs index e488c47e7..9905d39c8 100644 --- a/src/TensorFlowNET.Core/Operations/NnOps/RNNCell.cs +++ b/src/TensorFlowNET.Core/Operations/NnOps/RNNCell.cs @@ -19,9 +19,8 @@ limitations under the License. using Tensorflow.Common.Types; using Tensorflow.Keras; using Tensorflow.Keras.ArgsDefinition; -using Tensorflow.Keras.ArgsDefinition.Rnn; using Tensorflow.Keras.Engine; -using Tensorflow.Keras.Layers.Rnn; +using Tensorflow.Keras.Layers; using Tensorflow.Keras.Saving; using Tensorflow.NumPy; using Tensorflow.Operations; @@ -145,7 +144,7 @@ private Tensor _zero_state_tensors(object state_size, Tensor batch_size, TF_Data throw new NotImplementedException("_zero_state_tensors"); } - public Tensors Apply(Tensors inputs, Tensors state = null, bool is_training = false, IOptionalArgs? optional_args = null) + public Tensors Apply(Tensors inputs, Tensors state = null, bool? is_training = false, IOptionalArgs? optional_args = null) { throw new NotImplementedException(); } diff --git a/src/TensorFlowNET.Core/Operations/Operation.cs b/src/TensorFlowNET.Core/Operations/Operation.cs index d31b26d4a..e59c381cb 100644 --- a/src/TensorFlowNET.Core/Operations/Operation.cs +++ b/src/TensorFlowNET.Core/Operations/Operation.cs @@ -206,12 +206,11 @@ internal unsafe TF_DataType _get_attr_type(string name) return result; } - internal unsafe int _get_attr_int(string name) + internal unsafe long _get_attr_int(string name) { - Status status = new(); - int result; - c_api.TF_OperationGetAttrInt(_handle, name, new IntPtr(&result), status); - status.Check(true); + long result; + c_api.TF_OperationGetAttrInt(_handle, name, new IntPtr(&result), tf.Status); + tf.Status.Check(true); return result; } diff --git a/src/TensorFlowNET.Core/Operations/array_ops.cs b/src/TensorFlowNET.Core/Operations/array_ops.cs index 7f787533a..f80dcd2c4 100644 --- a/src/TensorFlowNET.Core/Operations/array_ops.cs +++ b/src/TensorFlowNET.Core/Operations/array_ops.cs @@ -137,7 +137,7 @@ public static Tensor zeros(Tensors shape, TF_DataType dtype = TF_DataType.TF_FLO if(shape.Length > 1) { shapeTensor = ops.convert_to_tensor(shape, dtypes.int32); - if(shapeTensor.ndim > 1) + if (shapeTensor.ndim > 1) { shapeTensor = array_ops.reshape(shapeTensor, new Shape(-1)); } @@ -304,6 +304,10 @@ public static Tensor _autopacking_helper(IEnumerable list_or_tuple, TF_D { elems_as_tensors.Add(tensor); } + else if (elem is KerasTensor kt) + { + elems_as_tensors.Add(kt); + } else { var elem_tensor = constant_op.constant(elem, dtype: dtype, name: i.ToString()); @@ -404,7 +408,20 @@ public static Tensor reshape(Tensor tensor, Shape shape, string name = null) => gen_array_ops.reshape(tensor, shape, name: name); public static Tensor reshape(Tensor tensor, object[] shape, string name = null) - => gen_array_ops.reshape(tensor, ops.convert_to_tensor(shape), name: name); + { + var dims = shape_utils.from_object_array(shape); + return gen_array_ops.reshape(tensor, dims, name: name); + } + + public static Tensor reverse(Tensor tensor, Tensor axis, string name = null) + => tf.Context.ExecuteOp("ReverseV2", name, new ExecuteOpArgs(tensor, axis) + { + GetGradientAttrs = (op) => new + { + T = op.get_attr("T"), + Tidx = op.get_attr("Tidx") + } + }); private static Tensor ones_like_impl(T tensor, TF_DataType dtype, string name, bool optimize = true) { @@ -425,6 +442,10 @@ public static Tensor ones(Tensor shape, TF_DataType dtype = TF_DataType.TF_FLOAT return tf_with(ops.name_scope(name, "ones", new { shape }), scope => { name = scope; + if (shape._shape_tuple().Length == 0) + { + shape = reshape(shape, new Shape(-1)); + } var output = gen_array_ops.fill(shape, constant_op.constant(1.0f, dtype: dtype), name: name); return output; }); @@ -603,7 +624,17 @@ public static Tensor shape_internal(Tensor input, string name = null, bool optim } } - return gen_array_ops.shape(input, name: name, out_type: out_type); + return tf.Context.ExecuteOp("Shape", name, new ExecuteOpArgs(input) + { + GetGradientAttrs = (op) => new + { + T = op.get_attr("T"), + out_type = op.get_attr("out_type") + } + }.SetAttributes(new + { + out_type + })).First(); }); } @@ -637,6 +668,18 @@ public static Tensor tile(Tensor input, Tensor multiples, string name = null) } }); + /*public static Tensor tile(Tensor input, Shape multiples, string name = null) + { + return tf.Context.ExecuteOp("Tile", name, new ExecuteOpArgs(input, multiples) + { + GetGradientAttrs = (op) => new + { + T = op.get_attr("T"), + Tmultiples = op.get_attr("Tmultiples") + } + }); + }*/ + public static Tensor zeros_like(Tensor tensor, TF_DataType dtype = TF_DataType.DtInvalid, string name = null, bool optimize = true) { return tf_with(ops.name_scope(name, "zeros_like", new Tensor[] { tensor }), scope => @@ -678,7 +721,6 @@ public static Tensor stop_gradient(Tensor input, string name = null) var tape = tf.GradientTape().stop_recording(); var result = gen_array_ops.stop_gradient(input, name); tape.StartRecord(); - tf.GradientTape().PushTape(tape); return result; } @@ -704,23 +746,26 @@ public static Tensor strided_slice(Tensor input_, Tensor begin, Tensor end, int new_axis_mask = 0, int shrink_axis_mask = 0, string name = null) - { - var op = gen_array_ops.strided_slice( - input: input_, - begin: begin, - end: end, - strides: strides, - begin_mask: begin_mask, - end_mask: end_mask, - ellipsis_mask: ellipsis_mask, - new_axis_mask: new_axis_mask, - shrink_axis_mask: shrink_axis_mask, - name: name); - - string parent_name = name; - - return op; - } + => tf.Context.ExecuteOp("StridedSlice", name, new ExecuteOpArgs(input_, begin, end, strides) + { + GetGradientAttrs = (op) => new + { + T = op.get_attr("T"), + Index = op.get_attr("Index"), + begin_mask = op.get_attr("begin_mask"), + end_mask = op.get_attr("end_mask"), + ellipsis_mask = op.get_attr("ellipsis_mask"), + new_axis_mask = op.get_attr("new_axis_mask"), + shrink_axis_mask = op.get_attr("shrink_axis_mask") + } + }.SetAttributes(new + { + begin_mask, + end_mask, + ellipsis_mask, + new_axis_mask, + shrink_axis_mask + })); /// /// Returns the gradient of `StridedSlice`. @@ -893,23 +938,9 @@ public static Tensor broadcast_static_shape(Tensor shape_x, Tensor shape_y) /// /// /// - public static Tensor concat(Tensor[] values, int axis, string name = "concat") - { - if (values.Length == 1) // Degenerate case of one tensor. - { - return tf_with(ops.name_scope(name), scope => - { - var t = ops.convert_to_tensor(axis, name: "concat_dim", dtype: TF_DataType.TF_INT32); - return identity(values[0], name: scope); - }); - } - - return gen_array_ops.concat_v2(values, ops.convert_to_tensor(axis), name: name); - } - public static Tensor concat(Tensor[] values, Tensor axis, string name = "concat") { - return gen_array_ops.concat_v2(values, axis, name: name); + return tf.Context.ExecuteOp("ConcatV2", name, new ExecuteOpArgs(values, axis)); } public static Tensor concat(object[] values, int axis, string name = "concat") diff --git a/src/TensorFlowNET.Core/Operations/gen_image_ops.cs b/src/TensorFlowNET.Core/Operations/gen_image_ops.cs index 9240b5905..cbe661ae5 100644 --- a/src/TensorFlowNET.Core/Operations/gen_image_ops.cs +++ b/src/TensorFlowNET.Core/Operations/gen_image_ops.cs @@ -16,18 +16,312 @@ limitations under the License. using System; using System.Linq; +using Tensorflow.Eager; using static Tensorflow.Binding; +using Tensorflow.Exceptions; +using Tensorflow.Contexts; +using System.Xml.Linq; +using Google.Protobuf; namespace Tensorflow { public class gen_image_ops { + public static Tensor adjust_contrastv2(Tensor images, Tensor contrast_factor, string name = null) + { + var _ctx = tf.Context; + if (_ctx.executing_eagerly()) + { + try + { + var _fast_path_result = tf.Runner.TFE_FastPathExecute(new FastPathOpExecInfo(_ctx, "AdjustContrastv2", name) { + args = new object[] { images, contrast_factor }, attrs = new Dictionary() { } }); + return _fast_path_result[0]; + } + catch (NotOkStatusException ex) + { + throw ex; + } + catch (Exception) + { + } + try + { + return adjust_contrastv2_eager_fallback(images, contrast_factor, name: name, ctx: _ctx); + } + catch (Exception) + { + } + } + Dictionary keywords = new(); + keywords["images"] = images; + keywords["contrast_factor"] = contrast_factor; + var _op = tf.OpDefLib._apply_op_helper("AdjustContrastv2", name, keywords); + var _result = _op.outputs; + if (_execute.must_record_gradient()) + { + object[] _attrs = new object[] { "T", _op._get_attr_type("T") }; + _execute.record_gradient("AdjustContrastv2", _op.inputs, _attrs, _result); + } + return _result[0]; + } + public static Tensor adjust_contrastv2(Tensor image, float contrast_factor, string name = null) + { + return adjust_contrastv2(image, tf.convert_to_tensor(contrast_factor), name: name); + } + + public static Tensor adjust_contrastv2_eager_fallback(Tensor images, Tensor contrast_factor, string name, Context ctx) + { + Tensor[] _inputs_flat = new Tensor[] { images, contrast_factor}; + object[] _attrs = new object[] { "T", images.dtype }; + var _result = _execute.execute("AdjustContrastv2", 1, inputs: _inputs_flat, attrs: _attrs, ctx: ctx, name: name); + if (_execute.must_record_gradient()) + { + _execute.record_gradient("AdjustContrastv2", _inputs_flat, _attrs, _result); + } + return _result[0]; + } + + public static Tensor adjust_hue(Tensor images, Tensor delta, string name = null) + { + var _ctx = tf.Context; + if (_ctx.executing_eagerly()) + { + try + { + var _fast_path_result = tf.Runner.TFE_FastPathExecute(new FastPathOpExecInfo(_ctx, "AdjustHue", name) { + args = new object[] { images, delta }, attrs = new Dictionary() { } }); + return _fast_path_result[0]; + } + catch (NotOkStatusException ex) + { + throw ex; + } + catch (Exception) + { + } + try + { + return adjust_hue_eager_fallback(images, delta, name: name, ctx: _ctx); + } + catch (Exception) + { + } + } + Dictionary keywords = new(); + keywords["images"] = images; + keywords["delta"] = delta; + var _op = tf.OpDefLib._apply_op_helper("AdjustHue", name, keywords); + var _result = _op.outputs; + if (_execute.must_record_gradient()) + { + object[] _attrs = new object[] { "T", _op._get_attr_type("T") }; + _execute.record_gradient("AdjustHue", _op.inputs, _attrs, _result); + } + return _result[0]; + } + + public static Tensor adjust_hue(Tensor images, float delta, string name = null) + => adjust_hue(images, delta, name: name); + + public static Tensor adjust_hue_eager_fallback(Tensor images, Tensor delta, string name, Context ctx) + { + Tensor[] _inputs_flat = new Tensor[] { images, delta}; + object[] _attrs = new object[] { "T", images.dtype }; + var _result = _execute.execute("AdjustHue", 1, inputs: _inputs_flat, attrs: _attrs, ctx: ctx, name: name); + if (_execute.must_record_gradient()) + { + _execute.record_gradient("AdjustHue", _inputs_flat, _attrs, _result); + } + return _result[0]; + } + + public static Tensor adjust_saturation(Tensor images, Tensor scale, string name = null) + { + var _ctx = tf.Context; + if (_ctx.executing_eagerly()) + { + try + { + var _fast_path_result = tf.Runner.TFE_FastPathExecute(new FastPathOpExecInfo(_ctx, "AdjustSaturation", name) + { + args = new object[] { images, scale }, + attrs = new Dictionary() { } + }); + return _fast_path_result[0]; + } + catch (NotOkStatusException ex) + { + throw ex; + } + catch (Exception) + { + } + try + { + return adjust_hue_eager_fallback(images, scale, name: name, ctx: _ctx); + } + catch (Exception) + { + } + } + Dictionary keywords = new(); + keywords["images"] = images; + keywords["scale"] = scale; + var _op = tf.OpDefLib._apply_op_helper("AdjustSaturation", name, keywords); + var _result = _op.outputs; + if (_execute.must_record_gradient()) + { + object[] _attrs = new object[] { "T", _op._get_attr_type("T") }; + _execute.record_gradient("AdjustSaturation", _op.inputs, _attrs, _result); + } + return _result[0]; + } + + public static Tensor adjust_saturation(Tensor images, float scale, string name = null) + => adjust_saturation(images, ops.convert_to_tensor(scale), name: name); + + public static Tensor adjust_saturation_eager_fallback(Tensor images, Tensor scale, string name, Context ctx) + { + Tensor[] _inputs_flat = new Tensor[] { images, scale }; + object[] _attrs = new object[] { "T", images.dtype }; + var _result = _execute.execute("AdjustSaturation", 1, inputs: _inputs_flat, attrs: _attrs, ctx: ctx, name: name); + if (_execute.must_record_gradient()) + { + _execute.record_gradient("AdjustSaturation", _inputs_flat, _attrs, _result); + } + return _result[0]; + } + public static (Tensor, Tensor, Tensor, Tensor) combined_non_max_suppression(Tensor boxes, Tensor scores, Tensor max_output_size_per_class, Tensor max_total_size, - Tensor iou_threshold, Tensor score_threshold, bool pad_per_class, bool clip_boxes) + Tensor iou_threshold, Tensor score_threshold, bool pad_per_class = false, bool clip_boxes = true, string name = null) { - throw new NotImplementedException("combined_non_max_suppression"); + var _ctx = tf.Context; + if (_ctx.executing_eagerly()) + { + try + { + var _fast_path_result = tf.Runner.TFE_FastPathExecute(new FastPathOpExecInfo(_ctx, "CombinedNonMaxSuppression", name){ + args = new object[] { + boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, score_threshold, + "pad_per_class", pad_per_class, "clip_boxes", clip_boxes}, + attrs = new Dictionary() { }}); + return (_fast_path_result[0], _fast_path_result[1], _fast_path_result[2], _fast_path_result[3]); + } + catch (NotOkStatusException ex) + { + throw ex; + } + catch (Exception) + { + } + try + { + return combined_non_max_suppression_eager_fallback( + boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, + score_threshold, pad_per_class, clip_boxes, name, ctx: _ctx); + } + catch (Exception) + { + } + } + Dictionary keywords = new(); + keywords["boxes"] = boxes; + keywords["scores"] = scores; + keywords["max_output_size_per_class"] = max_output_size_per_class; + keywords["max_total_size"] = max_total_size; + keywords["iou_threshold"] = iou_threshold; + keywords["score_threshold"] = score_threshold; + keywords["pad_per_class"] = pad_per_class; + keywords["clip_boxes"] = clip_boxes; + + var _op = tf.OpDefLib._apply_op_helper("CombinedNonMaxSuppression", name, keywords); + var _result = _op.outputs; + if (_execute.must_record_gradient()) + { + object[] _attrs = new object[] { "pad_per_class", _op._get_attr_type("pad_per_class") ,"clip_boxes", _op._get_attr_type("clip_boxes")}; + _execute.record_gradient("CombinedNonMaxSuppression", _op.inputs, _attrs, _result); + } + return (_result[0], _result[1], _result[2], _result[3]); } + public static (Tensor, Tensor, Tensor, Tensor) combined_non_max_suppression_eager_fallback(Tensor boxes, Tensor scores, Tensor max_output_size_per_class, Tensor max_total_size, + Tensor iou_threshold, Tensor score_threshold, bool pad_per_class, bool clip_boxes, string name, Context ctx) + { + Tensor[] _inputs_flat = new Tensor[] { boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, score_threshold }; + object[] _attrs = new object[] { "pad_per_class", pad_per_class, "clip_boxes", clip_boxes }; + var _result = _execute.execute("CombinedNonMaxSuppression", 1, inputs: _inputs_flat, attrs: _attrs, ctx: ctx, name: name); + if (_execute.must_record_gradient()) + { + _execute.record_gradient("CombinedNonMaxSuppression", _inputs_flat, _attrs, _result); + } + return (_result[0], _result[1], _result[2], _result[3]); + } + + public static Tensor crop_and_resize(Tensor image, Tensor boxes, Tensor box_ind, Tensor crop_size, string method = "bilinear", float extrapolation_value = 0f, string name = null) + { + var _ctx = tf.Context; + if (_ctx.executing_eagerly()) + { + try + { + var _fast_path_result = tf.Runner.TFE_FastPathExecute(new FastPathOpExecInfo(_ctx, "CropAndResize", name) { + args = new object[] { + image, boxes, box_ind, crop_size, "method", method, "extrapolation_value", extrapolation_value }, attrs = new Dictionary() { } }); + return _fast_path_result[0]; + } + catch (NotOkStatusException ex) + { + throw ex; + } + catch (Exception) + { + } + try + { + return crop_and_resize_eager_fallback( + image, boxes, box_ind, crop_size, method: method, extrapolation_value: extrapolation_value, name: name, ctx: _ctx); + } + catch (Exception) + { + } + } + Dictionary keywords = new(); + keywords["image"] = image; + keywords["boxes"] = boxes; + keywords["box_ind"] = box_ind; + keywords["crop_size"] = crop_size; + keywords["method"] = method; + keywords["extrapolation_value"] = extrapolation_value; + var _op = tf.OpDefLib._apply_op_helper("CropAndResize", name, keywords); + var _result = _op.outputs; + if (_execute.must_record_gradient()) + { + object[] _attrs = new object[] { "T", _op._get_attr_type("T") ,"method", _op._get_attr_type("method") , + "extrapolation_value", _op.get_attr("extrapolation_value")}; + _execute.record_gradient("CropAndResize", _op.inputs, _attrs, _result); + } + return _result[0]; + } + + public static Tensor crop_and_resize_eager_fallback(Tensor image, Tensor boxes, Tensor box_ind, Tensor crop_size, string method, float extrapolation_value, string name, Context ctx) + { + if (method is null) + method = "bilinear"; + //var method_cpmpat = ByteString.CopyFromUtf8(method ?? string.Empty); + //var extrapolation_value_float = (float)extrapolation_value; + + Tensor[] _inputs_flat = new Tensor[] { image, boxes, box_ind, crop_size, tf.convert_to_tensor(method), tf.convert_to_tensor(extrapolation_value) }; + object[] _attrs = new object[] { "T", image.dtype }; + var _result = _execute.execute("CropAndResize", 1, inputs: _inputs_flat, attrs: _attrs, ctx: ctx, name: name); + if (_execute.must_record_gradient()) + { + _execute.record_gradient("CropAndResize", _inputs_flat, _attrs, _result); + } + return _result[0]; + } + + public static Tensor convert_image_dtype(Tensor image, TF_DataType dtype, bool saturate = false, string name = null) { if (dtype == image.dtype) diff --git a/src/TensorFlowNET.Core/Operations/image_ops_impl.cs b/src/TensorFlowNET.Core/Operations/image_ops_impl.cs index 126df9e42..318b8b142 100644 --- a/src/TensorFlowNET.Core/Operations/image_ops_impl.cs +++ b/src/TensorFlowNET.Core/Operations/image_ops_impl.cs @@ -102,11 +102,12 @@ internal static Operation[] _CheckAtLeast3DImage(Tensor image, bool require_stat { throw new ValueError("\'image\' must be fully defined."); } - for (int x = 1; x < 4; x++) + var dims = image_shape["-3:"]; + foreach (var dim in dims.dims) { - if (image_shape.dims[x] == 0) + if (dim == 0) { - throw new ValueError(String.Format("inner 3 dims of \'image.shape\' must be > 0: {0}", image_shape)); + throw new ValueError("inner 3 dimensions of \'image\' must be > 0: " + image_shape); } } @@ -208,7 +209,7 @@ internal static Tensor _random_flip(Tensor image, int flip_index, int seed, stri } public static Tensor flip_left_right(Tensor image) - => _flip(image, 1, "flip_left_right"); + => _flip(image, 0, "flip_left_right"); public static Tensor flip_up_down(Tensor image) => _flip(image, 1, "flip_up_down"); @@ -226,7 +227,7 @@ internal static Tensor _flip(Tensor image, int flip_index, string scope_name) } else if (shape.ndim == 4) { - return gen_array_ops.reverse(image, ops.convert_to_tensor(new[] { flip_index + 1 })); + return gen_array_ops.reverse_v2(image, ops.convert_to_tensor(new[] { (flip_index + 1) % 2 })); } else { @@ -965,9 +966,9 @@ public static Tensor per_image_standardization(Tensor image) if (Array.Exists(new[] { dtypes.float16, dtypes.float32 }, orig_dtype => orig_dtype == orig_dtype)) image = convert_image_dtype(image, dtypes.float32); - var num_pixels_ = array_ops.shape(image).dims; - num_pixels_ = num_pixels_.Skip(num_pixels_.Length - 3).Take(num_pixels_.Length - (num_pixels_.Length - 3)).ToArray(); - Tensor num_pixels = math_ops.reduce_prod(new Tensor(num_pixels_)); + var x = image.shape["-3:"]; + var num_pixels = math_ops.reduce_prod(x); + Tensor image_mean = math_ops.reduce_mean(image, axis: new(-1, -2, -3), keepdims: true); var stddev = math_ops.reduce_std(image, axis: new(-1, -2, -3), keepdims: true); diff --git a/src/TensorFlowNET.Core/Operations/math_ops.cs b/src/TensorFlowNET.Core/Operations/math_ops.cs index d00a5d367..e77df702f 100644 --- a/src/TensorFlowNET.Core/Operations/math_ops.cs +++ b/src/TensorFlowNET.Core/Operations/math_ops.cs @@ -77,6 +77,9 @@ public static Tensor add_n(Tensor[] inputs, string name = null) public static Tensor argmax(Tensor input, Axis dimension, TF_DataType output_type = TF_DataType.TF_INT64, string name = null) => gen_math_ops.arg_max(input, dimension, output_type: output_type, name: name); + public static Tensor argmin(Tensor input, Axis dimension, TF_DataType output_type = TF_DataType.TF_INT64, string name = null) + => gen_math_ops.arg_min(input, dimension, output_type: output_type, name: name); + public static Tensor round(Tensor x, string name = null) { x = ops.convert_to_tensor(x, name: "x"); @@ -587,6 +590,17 @@ public static Tensor reduce_any(Tensor input_tensor, Axis axis = null, bool keep return _may_reduce_to_scalar(keepdims, axis, max); } + public static Tensor reduce_euclidean_norm(Tensor input_tensor, Axis axis = null, bool keepdims = false, string name = null) + { + var r = _ReductionDims(input_tensor, axis); + var distance = tf.Context.ExecuteOp("EuclideanNorm", name, + new ExecuteOpArgs(input_tensor, r).SetAttributes(new + { + keep_dims = keepdims + })); + return _may_reduce_to_scalar(keepdims, axis, distance); + } + public static Tensor reduce_max(Tensor input_tensor, Axis axis = null, bool keepdims = false, string name = null) { var r = _ReductionDims(input_tensor, axis); @@ -780,10 +794,7 @@ public static Tensor matmul(Tensor a, Tensor b, bool adjoint_a = false, bool adjoint_b = false, bool a_is_sparse = false, bool b_is_sparse = false, string name = null) - { - Tensor result = null; - - tf_with(ops.name_scope(name, "MatMul", new Tensor[] { a, b }), scope => + => tf_with(ops.name_scope(name, "MatMul", (a, b)), scope => { name = scope; @@ -804,12 +815,10 @@ public static Tensor matmul(Tensor a, Tensor b, transpose_b = true; } - result = gen_math_ops.mat_mul(a, b, transpose_a, transpose_b, name); + return tf.Context.ExecuteOp("MatMul", name, new ExecuteOpArgs(a, b) + .SetAttributes(new { transpose_a, transpose_b })); }); - return result; - } - public static Tensor batch_matmul(Tensor x, Tensor y, bool adj_x = false, bool adj_y = false, string name = null) diff --git a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj index 3bc20289a..be714618d 100644 --- a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj +++ b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj @@ -4,14 +4,14 @@ netstandard2.0;net6.0 Tensorflow.Binding Tensorflow - 2.10.0 - 0.110.0 + 2.11.0 + 0.110.3 10.0 enable - Haiping Chen, Meinrad Recheis, Eli Belash + Haiping Chen, Eli Belash, Yaohui Liu, Meinrad Recheis SciSharp STACK False - Apache 2.0, Haiping Chen $([System.DateTime]::UtcNow.ToString(yyyy)) + Apache 2.0, Haiping Chen since 2018 https://github.com/SciSharp/TensorFlow.NET git http://scisharpstack.org @@ -20,10 +20,11 @@ Google's TensorFlow full binding in .NET Standard. Building, training and infering deep learning models. https://tensorflownet.readthedocs.io - 0.110.0.0 + 0.110.3.0 tf.net 0.110.x and above are based on tensorflow native 2.11.0 - * RNN, LSTM works. + * Support RNN, LSTM model. + * Support Transformer model. tf.net 0.100.x and above are based on tensorflow native 2.10.0 @@ -42,12 +43,11 @@ https://tensorflownet.readthedocs.io tf.net 0.10x.x aligns with TensorFlow v2.10.x native library. tf.net 0.11x.x aligns with TensorFlow v2.11.x native library. - 0.110.0.0 + 0.110.3.0 LICENSE true packages true - Open.snk AnyCPU;x64 TensorFlow.NET Debug;Release;GPU @@ -88,6 +88,66 @@ https://tensorflownet.readthedocs.io + + 1 + $(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612 + + + + 1 + $(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612 + + + + 1 + $(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612 + + + + 1 + $(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612 + + + + 1 + $(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612 + + + + 1 + $(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612 + + + + 1 + $(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612 + + + + 1 + $(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612 + + + + 1 + $(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612 + + + + 1 + $(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612 + + + + 1 + $(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612 + + + + 1 + $(NoWarn),1570,1573,1591,1712,8603,8604,8625,CS0612 + + @@ -112,10 +172,11 @@ https://tensorflownet.readthedocs.io - + + diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.Conversions.cs b/src/TensorFlowNET.Core/Tensors/Tensor.Conversions.cs index 18bdc1aaf..fdd62aeed 100644 --- a/src/TensorFlowNET.Core/Tensors/Tensor.Conversions.cs +++ b/src/TensorFlowNET.Core/Tensors/Tensor.Conversions.cs @@ -14,19 +14,10 @@ You may obtain a copy of the License at limitations under the License. ******************************************************************************/ -using Tensorflow.NumPy; -using System; -using System.Diagnostics.CodeAnalysis; -using System.Text; -using Tensorflow.Framework.Models; -using static Tensorflow.Binding; +namespace Tensorflow; -namespace Tensorflow +public partial class Tensor { - [SuppressMessage("ReSharper", "InvokeAsExtensionMethod")] - public partial class Tensor - { - public TensorSpec ToTensorSpec() - => new TensorSpec(shape, dtype, name); - } + public TensorSpec ToTensorSpec() + => new TensorSpec(shape, dtype, name); } \ No newline at end of file diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.Index.cs b/src/TensorFlowNET.Core/Tensors/Tensor.Index.cs index c8f47825c..51062cf3b 100644 --- a/src/TensorFlowNET.Core/Tensors/Tensor.Index.cs +++ b/src/TensorFlowNET.Core/Tensors/Tensor.Index.cs @@ -42,7 +42,7 @@ public Tensor this[params Slice[] slices] array_ops.stack(args.End), array_ops.stack(args.Strides)); - return gen_array_ops.strided_slice( + return array_ops.strided_slice( this, packed_begin, packed_end, @@ -180,8 +180,7 @@ public Tensor slice(int start) array_ops.stack(end.ToArray()), array_ops.stack(strides.ToArray())); - return gen_array_ops.strided_slice( - this, + return array_ops.strided_slice(this, packed_begin, packed_end, packed_strides, diff --git a/src/TensorFlowNET.Core/APIs/tf.exp.cs b/src/TensorFlowNET.Core/Tensors/Tensor.Keras.cs similarity index 75% rename from src/TensorFlowNET.Core/APIs/tf.exp.cs rename to src/TensorFlowNET.Core/Tensors/Tensor.Keras.cs index 56ea1898e..ca946ca48 100644 --- a/src/TensorFlowNET.Core/APIs/tf.exp.cs +++ b/src/TensorFlowNET.Core/Tensors/Tensor.Keras.cs @@ -1,6 +1,6 @@ /***************************************************************************** Copyright 2018 The TensorFlow.NET Authors. All Rights Reserved. - + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at @@ -14,12 +14,14 @@ You may obtain a copy of the License at limitations under the License. ******************************************************************************/ -namespace Tensorflow +namespace Tensorflow; + +public partial class Tensor { - public partial class tensorflow - { - public Tensor exp(Tensor x, - string name = null) => gen_math_ops.exp(x, name); + public bool IsFromKerasTensor { get; set; } - } -} + /// + /// Keras History: (Layer, (node_index, tensor_index)) + /// + public KerasHistory KerasHistory { get; set; } +} \ No newline at end of file diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.cs b/src/TensorFlowNET.Core/Tensors/Tensor.cs index c0e5d4357..65e1c8576 100644 --- a/src/TensorFlowNET.Core/Tensors/Tensor.cs +++ b/src/TensorFlowNET.Core/Tensors/Tensor.cs @@ -146,11 +146,6 @@ public int[] _shape_tuple() return rank < 0 ? null : shape.dims.Select(x => (int)x).ToArray(); } - /// - /// Keras History: (Layer, (node_index, tensor_index)) - /// - public KerasHistory KerasHistory { get; set; } - /// /// Updates the shape of this tensor. /// diff --git a/src/TensorFlowNET.Core/Tensors/shape_utils.cs b/src/TensorFlowNET.Core/Tensors/shape_utils.cs index 254cdad89..a77dd34ce 100644 --- a/src/TensorFlowNET.Core/Tensors/shape_utils.cs +++ b/src/TensorFlowNET.Core/Tensors/shape_utils.cs @@ -1,5 +1,6 @@ using System; using System.Linq; +using Tensorflow.Eager; using static Tensorflow.Binding; namespace Tensorflow @@ -13,5 +14,31 @@ public static Tensor static_or_dynamic_map_fn(Func fn, Tensor el throw new NotImplementedException(""); } + + public static Shape from_object_array(object[] shape) + { + var dims = shape.Select(x => + { + if (x is KerasTensor kt && kt.inferred_value != null) + { + return kt.inferred_value.as_int_list()[0]; + } + else if (x is EagerTensor et && et.dtype == TF_DataType.TF_INT32) + { + return et.ToArray()[0]; + } + else if (x is int i) + { + return i; + } + else if (x is long l) + { + return l; + } + throw new NotImplementedException(); + }).ToArray(); + + return new Shape(dims); + } } } diff --git a/src/TensorFlowNET.Core/Tensors/tf.constant.cs b/src/TensorFlowNET.Core/Tensors/tf.constant.cs index 6a62d34a5..ac26b3da3 100644 --- a/src/TensorFlowNET.Core/Tensors/tf.constant.cs +++ b/src/TensorFlowNET.Core/Tensors/tf.constant.cs @@ -46,6 +46,9 @@ public Tensor zeros(Tensor shape, TF_DataType dtype = TF_DataType.TF_FLOAT, stri public Tensor ones(Shape shape, TF_DataType dtype = TF_DataType.TF_FLOAT, string name = null) => array_ops.ones(shape, dtype, name); + public Tensor ones(Tensor shape, TF_DataType dtype = TF_DataType.TF_FLOAT, string name = null) + => array_ops.ones(shape, dtype, name); + public Tensor size(Tensor input, string name = null, TF_DataType out_type = TF_DataType.TF_INT32) => array_ops.size(input, diff --git a/src/TensorFlowNET.Core/Training/gen_training_ops.cs b/src/TensorFlowNET.Core/Training/gen_training_ops.cs index abe85a141..df7dd9e65 100644 --- a/src/TensorFlowNET.Core/Training/gen_training_ops.cs +++ b/src/TensorFlowNET.Core/Training/gen_training_ops.cs @@ -51,5 +51,9 @@ public static Tensor apply_gradient_descent(IVariableV1 var, Tensor alpha, Tenso public static Tensor resource_apply_gradient_descent(Tensor var, Tensor alpha, Tensor delta, bool use_locking = false, string name = null) => tf.Context.ExecuteOp("ResourceApplyGradientDescent", name, new ExecuteOpArgs(var, alpha, delta).SetAttributes(new { use_locking })); + + public static Tensor resource_apply_keras_momentum(Tensor var, Tensor accum, Tensor lr, Tensor grad, Tensor momentum, bool use_locking = false, bool use_nesterov = false, string name = null) + => tf.Context.ExecuteOp("ResourceApplyKerasMomentum", name, + new ExecuteOpArgs(var, accum, lr, grad, momentum).SetAttributes(new { use_locking, use_nesterov })); } } diff --git a/src/TensorFlowNET.Core/ops.cs b/src/TensorFlowNET.Core/ops.cs index 7bd78a79f..351fd18ff 100644 --- a/src/TensorFlowNET.Core/ops.cs +++ b/src/TensorFlowNET.Core/ops.cs @@ -138,9 +138,22 @@ public static Tensor convert_to_tensor(object value, else { var graph = get_default_graph(); + if (graph is FuncGraph funcGraph) + { + return funcGraph.capture(eager_tensor, name: name); + } if (!graph.building_function) - throw new RuntimeError("Attempting to capture an EagerTensor without building a function."); - return (graph as FuncGraph).capture(eager_tensor, name: name); + { + // throw new RuntimeError("Attempting to capture an EagerTensor without building a function."); + return eager_tensor.AsPlaceholder(name: name); + } + } + } + else if (value is KerasTensor kt) + { + if (kt.inferred_value != null) + { + return convert_to_tensor(kt.inferred_value, dtype: kt.dtype, name: name); } } @@ -565,7 +578,9 @@ public static bool executing_eagerly_outside_functions() if (tf.Context.executing_eagerly()) return true; else - throw new NotImplementedException(""); + // TODO(Wanglongzhi2001), implement the false case + return true; + //throw new NotImplementedException(""); } public static bool inside_function() diff --git a/src/TensorFlowNET.Core/tensorflow.cs b/src/TensorFlowNET.Core/tensorflow.cs index dc4e48da8..e368b37cd 100644 --- a/src/TensorFlowNET.Core/tensorflow.cs +++ b/src/TensorFlowNET.Core/tensorflow.cs @@ -14,6 +14,7 @@ You may obtain a copy of the License at limitations under the License. ******************************************************************************/ +using Razorvine.Pickle; using Serilog; using Serilog.Core; using System.Reflection; @@ -22,6 +23,7 @@ limitations under the License. using Tensorflow.Eager; using Tensorflow.Gradients; using Tensorflow.Keras; +using Tensorflow.NumPy.Pickle; namespace Tensorflow { @@ -98,6 +100,10 @@ public tensorflow() "please visit https://github.com/SciSharp/TensorFlow.NET. If it still not work after installing the backend, please submit an " + "issue to https://github.com/SciSharp/TensorFlow.NET/issues"); } + + // register numpy reconstructor for pickle + Unpickler.registerConstructor("numpy.core.multiarray", "_reconstruct", new MultiArrayConstructor()); + Unpickler.registerConstructor("numpy", "dtype", new DtypeConstructor()); } public string VERSION => c_api.StringPiece(c_api.TF_Version()); diff --git a/src/TensorFlowNET.Keras/Activations.cs b/src/TensorFlowNET.Keras/Activations.cs index d6d8e3914..ce5b4eb13 100644 --- a/src/TensorFlowNET.Keras/Activations.cs +++ b/src/TensorFlowNET.Keras/Activations.cs @@ -44,7 +44,6 @@ public class Activations: IActivationsApi /// /// Register the name-activation mapping in this static class. /// - /// /// private static void RegisterActivation(Activation activation) { diff --git a/src/TensorFlowNET.Keras/BackendImpl.cs b/src/TensorFlowNET.Keras/BackendImpl.cs index 364800ae5..574cf5990 100644 --- a/src/TensorFlowNET.Keras/BackendImpl.cs +++ b/src/TensorFlowNET.Keras/BackendImpl.cs @@ -76,7 +76,7 @@ public void track_variable(IVariableV1 v) _GRAPH_VARIABLES[graph.graph_key] = v; } - public Tensor placeholder(Shape shape = null, + public KerasTensor placeholder(Shape shape = null, int ndim = -1, TF_DataType dtype = TF_DataType.DtInvalid, bool sparse = false, diff --git a/src/TensorFlowNET.Keras/Callbacks/CallbackList.cs b/src/TensorFlowNET.Keras/Callbacks/CallbackList.cs index 362f2280c..cb16aafa3 100644 --- a/src/TensorFlowNET.Keras/Callbacks/CallbackList.cs +++ b/src/TensorFlowNET.Keras/Callbacks/CallbackList.cs @@ -73,4 +73,9 @@ public void on_test_batch_end(long end_step, Dictionary logs) { callbacks.ForEach(x => x.on_test_batch_end(end_step, logs)); } + + public void on_test_end(Dictionary logs) + { + callbacks.ForEach(x => x.on_test_end(logs)); + } } diff --git a/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs b/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs index 73ccc87b0..a2a2ecfe2 100644 --- a/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs +++ b/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs @@ -5,13 +5,10 @@ namespace Tensorflow.Keras.Callbacks; /// /// Stop training when a monitored metric has stopped improving. /// -/// -/// - public class EarlyStopping: ICallback { int _paitence; - int _min_delta; + float _min_delta; int _verbose; int _stopped_epoch; int _wait; @@ -22,11 +19,13 @@ public class EarlyStopping: ICallback string _monitor; string _mode; bool _restore_best_weights; - List? _best_weights; + List? _best_weights; CallbackParams _parameters; + Func _monitor_op; + public Dictionary>? history { get; set; } // user need to pass a CallbackParams to EarlyStopping, CallbackParams at least need the model - public EarlyStopping(CallbackParams parameters,string monitor = "val_loss", int min_delta = 0, int patience = 0, + public EarlyStopping(CallbackParams parameters,string monitor = "val_loss", float min_delta = 0f, int patience = 0, int verbose = 1, string mode = "auto", float baseline = 0f, bool restore_best_weights = false, int start_from_epoch = 0) { @@ -41,17 +40,49 @@ public EarlyStopping(CallbackParams parameters,string monitor = "val_loss", int _min_delta = Math.Abs(min_delta); _restore_best_weights = restore_best_weights; _mode = mode; - if (mode != "auto" && mode != "min" && mode != "max") + + if (_mode != "auto" && _mode != "min" && _mode != "max") + { + Console.WriteLine($"EarlyStopping mode {_mode} is unknown, fallback to auto mode."); + _mode = "auto"; + } + + if (_mode == "min") + { + _monitor_op = np.less; + } + else if (_mode == "max") + { + _monitor_op = np.greater; + } + else + { + if (_monitor.EndsWith("acc") || _monitor.EndsWith("accuracy") || _monitor.EndsWith("auc")) + { + _monitor_op = np.greater; + } + else + { + _monitor_op = np.less; + } + } + + if (_monitor_op == np.greater) { - Console.WriteLine("EarlyStopping mode %s is unknown, fallback to auto mode.", mode); + _min_delta *= 1; + } + else + { + _min_delta *= -1; } } public void on_train_begin() { _wait = 0; _stopped_epoch = 0; + _best = _monitor_op == np.less ? (float)np.Inf : (float)-np.Inf; + _best_weights = null; _best_epoch = 0; - _best = (float)np.Inf; } public void on_epoch_begin(int epoch) @@ -77,7 +108,7 @@ public void on_epoch_end(int epoch, Dictionary epoch_logs) // Restore the weights after first epoch if no progress is ever made. if (_restore_best_weights && _best_weights == null) { - _best_weights = _parameters.Model.Weights; + _best_weights = _parameters.Model.get_weights(); } _wait += 1; @@ -86,7 +117,7 @@ public void on_epoch_end(int epoch, Dictionary epoch_logs) _best = current; _best_epoch = epoch; if (_restore_best_weights) - _best_weights = _parameters.Model.TrainableWeights; + _best_weights = _parameters.Model.get_weights(); // Only restart wait if we beat both the baseline and our previous best. if (_baseline == 0f || _is_improvement(current, _baseline)) _wait = 0; @@ -102,7 +133,7 @@ public void on_epoch_end(int epoch, Dictionary epoch_logs) { Console.WriteLine($"Restoring model weights from the end of the best epoch: {_best_epoch + 1}"); } - _parameters.Model.Weights = _best_weights; + _parameters.Model.set_weights(_best_weights); } } } @@ -134,20 +165,10 @@ float get_monitor_value(Dictionary logs) } public bool _is_improvement(float monitor_value, float reference_value) { - bool less_op = (monitor_value - _min_delta) < reference_value; - bool greater_op = (monitor_value - _min_delta) >= reference_value; - if (_mode == "min") - return less_op; - else if (_mode == "max") - return greater_op; - else - { - if (_monitor.EndsWith("acc") || _monitor.EndsWith("accuracy") || _monitor.EndsWith("auc")) - { - return greater_op; - } - else - return less_op; - } + return _monitor_op(monitor_value - _min_delta, reference_value); + } + + public void on_test_end(Dictionary logs) + { } } diff --git a/src/TensorFlowNET.Keras/Callbacks/History.cs b/src/TensorFlowNET.Keras/Callbacks/History.cs index c34f253d1..6d3ff6c38 100644 --- a/src/TensorFlowNET.Keras/Callbacks/History.cs +++ b/src/TensorFlowNET.Keras/Callbacks/History.cs @@ -81,4 +81,8 @@ public void on_test_batch_begin(long step) public void on_test_batch_end(long end_step, Dictionary logs) { } + + public void on_test_end(Dictionary logs) + { + } } diff --git a/src/TensorFlowNET.Keras/Callbacks/ProgbarLogger.cs b/src/TensorFlowNET.Keras/Callbacks/ProgbarLogger.cs index 9f2b1eb31..23b18cd47 100644 --- a/src/TensorFlowNET.Keras/Callbacks/ProgbarLogger.cs +++ b/src/TensorFlowNET.Keras/Callbacks/ProgbarLogger.cs @@ -118,5 +118,8 @@ public void on_test_batch_end(long end_step, Dictionary logs) } } + public void on_test_end(Dictionary logs) + { + } } } diff --git a/src/TensorFlowNET.Keras/Datasets/Imdb.cs b/src/TensorFlowNET.Keras/Datasets/Imdb.cs index 56b0d2a77..4d6df913b 100644 --- a/src/TensorFlowNET.Keras/Datasets/Imdb.cs +++ b/src/TensorFlowNET.Keras/Datasets/Imdb.cs @@ -3,8 +3,6 @@ using System.IO; using System.Text; using Tensorflow.Keras.Utils; -using Tensorflow.NumPy; -using System.Linq; namespace Tensorflow.Keras.Datasets { @@ -12,11 +10,57 @@ namespace Tensorflow.Keras.Datasets /// This is a dataset of 25,000 movies reviews from IMDB, labeled by sentiment /// (positive/negative). Reviews have been preprocessed, and each review is /// encoded as a list of word indexes(integers). + /// For convenience, words are indexed by overall frequency in the dataset, + /// so that for instance the integer "3" encodes the 3rd most frequent word in + /// the data.This allows for quick filtering operations such as: + /// "only consider the top 10,000 most + /// common words, but eliminate the top 20 most common words". + /// As a convention, "0" does not stand for a specific word, but instead is used + /// to encode the pad token. + /// Args: + /// path: where to cache the data (relative to %TEMP%/imdb/imdb.npz). + /// num_words: integer or None.Words are + /// ranked by how often they occur(in the training set) and only + /// the `num_words` most frequent words are kept.Any less frequent word + /// will appear as `oov_char` value in the sequence data.If None, + /// all words are kept.Defaults to `None`. + /// skip_top: skip the top N most frequently occurring words + /// (which may not be informative). These words will appear as + /// `oov_char` value in the dataset.When 0, no words are + /// skipped. Defaults to `0`. + /// maxlen: int or None.Maximum sequence length. + /// Any longer sequence will be truncated. None, means no truncation. + /// Defaults to `None`. + /// seed: int. Seed for reproducible data shuffling. + /// start_char: int. The start of a sequence will be marked with this + /// character. 0 is usually the padding character. Defaults to `1`. + /// oov_char: int. The out-of-vocabulary character. + /// Words that were cut out because of the `num_words` or + /// `skip_top` limits will be replaced with this character. + /// index_from: int. Index actual words with this index and higher. + /// Returns: + /// Tuple of Numpy arrays: `(x_train, labels_train), (x_test, labels_test)`. + /// + /// ** x_train, x_test**: lists of sequences, which are lists of indexes + /// (integers). If the num_words argument was specific, the maximum + /// possible index value is `num_words - 1`. If the `maxlen` argument was + /// specified, the largest possible sequence length is `maxlen`. + /// + /// ** labels_train, labels_test**: lists of integer labels(1 or 0). + /// + /// Raises: + /// ValueError: in case `maxlen` is so low + /// that no input sequence could be kept. + /// Note that the 'out of vocabulary' character is only used for + /// words that were present in the training set but are not included + /// because they're not making the `num_words` cut here. + /// Words that were not seen in the training set but are in the test set + /// have simply been skipped. /// + /// """Loads the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/). public class Imdb { string origin_folder = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/"; - string file_name = "imdb.npz"; string dest_folder = "imdb"; /// @@ -31,50 +75,163 @@ public class Imdb /// /// /// - public DatasetPass load_data(string path = "imdb.npz", - int num_words = -1, + public DatasetPass load_data( + string path = "imdb.npz", + int? num_words = null, int skip_top = 0, - int maxlen = -1, + int? maxlen = null, int seed = 113, - int start_char = 1, - int oov_char= 2, + int? start_char = 1, + int? oov_char = 2, int index_from = 3) { - var dst = Download(); + path = data_utils.get_file( + path, + origin: Path.Combine(origin_folder, "imdb.npz"), + file_hash: "69664113be75683a8fe16e3ed0ab59fda8886cb3cd7ada244f7d9544e4676b9f" + ); + path = Path.Combine(path, "imdb.npz"); + var fileBytes = File.ReadAllBytes(path); + var (x_train, x_test) = LoadX(fileBytes); + var (labels_train, labels_test) = LoadY(fileBytes); - var lines = File.ReadAllLines(Path.Combine(dst, "imdb_train.txt")); - var x_train_string = new string[lines.Length]; - var y_train = np.zeros(new int[] { lines.Length }, np.int64); - for (int i = 0; i < lines.Length; i++) + var indices = np.arange(len(x_train)); + np.random.shuffle(indices, seed); + x_train = x_train[indices]; + labels_train = labels_train[indices]; + + indices = np.arange(len(x_test)); + np.random.shuffle(indices, seed); + x_test = x_test[indices]; + labels_test = labels_test[indices]; + + var x_train_array = (int[,])x_train.ToMultiDimArray(); + var x_test_array = (int[,])x_test.ToMultiDimArray(); + var labels_train_array = (long[])labels_train.ToArray(); + var labels_test_array = (long[])labels_test.ToArray(); + + if (start_char != null) + { + var (d1, d2) = (x_train_array.GetLength(0), x_train_array.GetLength(1)); + int[,] new_x_train_array = new int[d1, d2 + 1]; + for (var i = 0; i < d1; i++) + { + new_x_train_array[i, 0] = (int)start_char; + Array.Copy(x_train_array, i * d2, new_x_train_array, i * (d2 + 1) + 1, d2); + } + (d1, d2) = (x_test_array.GetLength(0), x_test_array.GetLength(1)); + int[,] new_x_test_array = new int[d1, d2 + 1]; + for (var i = 0; i < d1; i++) + { + new_x_test_array[i, 0] = (int)start_char; + Array.Copy(x_test_array, i * d2, new_x_test_array, i * (d2 + 1) + 1, d2); + } + x_train_array = new_x_train_array; + x_test_array = new_x_test_array; + } + else if (index_from != 0) { - y_train[i] = long.Parse(lines[i].Substring(0, 1)); - x_train_string[i] = lines[i].Substring(2); + var (d1, d2) = (x_train_array.GetLength(0), x_train_array.GetLength(1)); + for (var i = 0; i < d1; i++) + { + for (var j = 0; j < d2; j++) + { + if (x_train_array[i, j] == 0) + break; + x_train_array[i, j] += index_from; + } + } + (d1, d2) = (x_test_array.GetLength(0), x_test_array.GetLength(1)); + for (var i = 0; i < d1; i++) + { + for (var j = 0; j < d2; j++) + { + if (x_test_array[i, j] == 0) + break; + x_test[i, j] += index_from; + } + } } - var x_train = np.array(x_train_string); + if (maxlen == null) + { + maxlen = max(x_train_array.GetLength(1), x_test_array.GetLength(1)); + } + (x_train_array, labels_train_array) = data_utils._remove_long_seq((int)maxlen, x_train_array, labels_train_array); + (x_test_array, labels_test_array) = data_utils._remove_long_seq((int)maxlen, x_test_array, labels_test_array); + if (x_train_array.Length == 0 || x_test_array.Length == 0) + throw new ValueError("After filtering for sequences shorter than maxlen=" + + $"{maxlen}, no sequence was kept. Increase maxlen."); + + int[,] xs_array = new int[x_train_array.GetLength(0) + x_test_array.GetLength(0), (int)maxlen]; + Array.Copy(x_train_array, xs_array, x_train_array.Length); + Array.Copy(x_test_array, 0, xs_array, x_train_array.Length, x_train_array.Length); + + long[] labels_array = new long[labels_train_array.Length + labels_test_array.Length]; + Array.Copy(labels_train_array, labels_array, labels_train_array.Length); + Array.Copy(labels_test_array, 0, labels_array, labels_train_array.Length, labels_test_array.Length); + + if (num_words == null) + { + var (d1, d2) = (xs_array.GetLength(0), xs_array.GetLength(1)); + num_words = 0; + for (var i = 0; i < d1; i++) + for (var j = 0; j < d2; j++) + num_words = max((int)num_words, (int)xs_array[i, j]); + } - File.ReadAllLines(Path.Combine(dst, "imdb_test.txt")); - var x_test_string = new string[lines.Length]; - var y_test = np.zeros(new int[] { lines.Length }, np.int64); - for (int i = 0; i < lines.Length; i++) + // by convention, use 2 as OOV word + // reserve 'index_from' (=3 by default) characters: + // 0 (padding), 1 (start), 2 (OOV) + if (oov_char != null) { - y_test[i] = long.Parse(lines[i].Substring(0, 1)); - x_test_string[i] = lines[i].Substring(2); + var (d1, d2) = (xs_array.GetLength(0), xs_array.GetLength(1)); + int[,] new_xs_array = new int[d1, d2]; + for (var i = 0; i < d1; i++) + { + for (var j = 0; j < d2; j++) + { + if (xs_array[i, j] == 0 || skip_top <= xs_array[i, j] && xs_array[i, j] < num_words) + new_xs_array[i, j] = xs_array[i, j]; + else + new_xs_array[i, j] = (int)oov_char; + } + } + xs_array = new_xs_array; } + else + { + var (d1, d2) = (xs_array.GetLength(0), xs_array.GetLength(1)); + int[,] new_xs_array = new int[d1, d2]; + for (var i = 0; i < d1; i++) + { + int k = 0; + for (var j = 0; j < d2; j++) + { + if (xs_array[i, j] == 0 || skip_top <= xs_array[i, j] && xs_array[i, j] < num_words) + new_xs_array[i, k++] = xs_array[i, j]; + } + } + xs_array = new_xs_array; + } + + Array.Copy(xs_array, x_train_array, x_train_array.Length); + Array.Copy(xs_array, x_train_array.Length, x_test_array, 0, x_train_array.Length); - var x_test = np.array(x_test_string); + Array.Copy(labels_array, labels_train_array, labels_train_array.Length); + Array.Copy(labels_array, labels_train_array.Length, labels_test_array, 0, labels_test_array.Length); return new DatasetPass { - Train = (x_train, y_train), - Test = (x_test, y_test) + Train = (x_train_array, labels_train_array), + Test = (x_test_array, labels_test_array) }; } (NDArray, NDArray) LoadX(byte[] bytes) { - var y = np.Load_Npz(bytes); - return (y["x_train.npy"], y["x_test.npy"]); + var x = np.Load_Npz(bytes); + return (x["x_train.npy"], x["x_test.npy"]); } (NDArray, NDArray) LoadY(byte[] bytes) @@ -82,16 +239,5 @@ public DatasetPass load_data(string path = "imdb.npz", var y = np.Load_Npz(bytes); return (y["y_train.npy"], y["y_test.npy"]); } - - string Download() - { - var dst = Path.Combine(Path.GetTempPath(), dest_folder); - Directory.CreateDirectory(dst); - - Web.Download(origin_folder + file_name, dst, file_name); - - return dst; - // return Path.Combine(dst, file_name); - } } } diff --git a/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs b/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs index b93c6aed7..16e646a35 100644 --- a/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs +++ b/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs @@ -52,7 +52,7 @@ Tensors permutation(Tensors tensor) /// /// Convert a Tensor of indices into a dataset of batched indices. /// - /// + /// /// IDatasetV2 slice_batch_indices(Tensor indices) { diff --git a/src/TensorFlowNET.Keras/Engine/Layer.Apply.cs b/src/TensorFlowNET.Keras/Engine/Layer.Apply.cs index d52190fd3..a3831bffa 100644 --- a/src/TensorFlowNET.Keras/Engine/Layer.Apply.cs +++ b/src/TensorFlowNET.Keras/Engine/Layer.Apply.cs @@ -10,10 +10,10 @@ public partial class Layer /// Wraps `call`, applying pre- and post-processing steps. /// /// - /// + /// /// /// - public virtual Tensors Apply(Tensors inputs, Tensors states = null, bool training = false, IOptionalArgs? optional_args = null) + public virtual Tensors Apply(Tensors inputs, Tensors states = null, bool? training = false, IOptionalArgs? optional_args = null) { if (callContext.Value == null) callContext.Value = new CallContext(); diff --git a/src/TensorFlowNET.Keras/Engine/Layer.FunctionalConstructionCall.cs b/src/TensorFlowNET.Keras/Engine/Layer.FunctionalConstructionCall.cs index 1d96e5811..e4023c3fd 100644 --- a/src/TensorFlowNET.Keras/Engine/Layer.FunctionalConstructionCall.cs +++ b/src/TensorFlowNET.Keras/Engine/Layer.FunctionalConstructionCall.cs @@ -1,7 +1,5 @@ using System; using Tensorflow.Keras.Utils; -using static Tensorflow.Binding; -using static Tensorflow.KerasApi; namespace Tensorflow.Keras.Engine { @@ -9,14 +7,6 @@ public partial class Layer { Tensors FunctionalConstructionCall(Tensors inputs) { - bool mask_arg_passed_by_framework = false; - bool training_arg_passed_by_framework = false; - Tensor training_value = null; - if (training_value == null) - { - training_arg_passed_by_framework = true; - } - if (base_layer_utils.needs_keras_history(inputs)) base_layer_utils.create_keras_history(inputs); diff --git a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs index eaa9eb23c..a74a77f18 100644 --- a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs +++ b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs @@ -15,7 +15,7 @@ namespace Tensorflow.Keras.Engine public partial class Model { /// - /// Returns the loss value & metrics values for the model in test mode. + /// Returns the loss value and metrics values for the model in test mode. /// /// /// @@ -27,7 +27,7 @@ public partial class Model /// /// /// - public Dictionary evaluate(Tensor x, Tensor y, + public Dictionary evaluate(NDArray x, NDArray y, int batch_size = -1, int verbose = 1, int steps = -1, @@ -115,62 +115,53 @@ public Dictionary evaluate(IDatasetV2 x, int verbose = 1, bool is /// The function to be called on each batch of data. /// Whether it is validation or test. /// - Dictionary evaluate(DataHandler data_handler, CallbackList callbacks, bool is_val, Func> test_func) + Dictionary evaluate(DataHandler data_handler, CallbackList callbacks, bool is_val, Func> test_func) { callbacks.on_test_begin(); - var results = new Dictionary(); - var logs = results; + var logs = new Dictionary(); foreach (var (epoch, iterator) in data_handler.enumerate_epochs()) { reset_metrics(); - callbacks.on_epoch_begin(epoch); - // data_handler.catch_stop_iteration(); - foreach (var step in data_handler.steps()) { callbacks.on_test_batch_begin(step); - - logs = test_func(data_handler, iterator.next()); - - tf_with(ops.control_dependencies(Array.Empty()), ctl => _train_counter.assign_add(1)); - + logs = test_func(data_handler, iterator); var end_step = step + data_handler.StepIncrement; if (!is_val) callbacks.on_test_batch_end(end_step, logs); } - - if (!is_val) - callbacks.on_epoch_end(epoch, logs); } - - foreach (var log in logs) - { - results[log.Key] = log.Value; - } - + callbacks.on_test_end(logs); + var results = new Dictionary(logs); return results; } - Dictionary test_function(DataHandler data_handler, Tensor[] data) + Dictionary test_function(DataHandler data_handler, OwnedIterator iterator) { - var (x, y) = data_handler.DataAdapter.Expand1d(data[0], data[1]); - - var y_pred = Apply(x, training: false); - var loss = compiled_loss.Call(y, y_pred); - - compiled_metrics.update_state(y, y_pred); - - var outputs = metrics.Select(x => (x.Name, x.result())).ToDictionary(x => x.Name, x => (float)x.Item2); + var data = iterator.next(); + var outputs = test_step(data_handler, data[0], data[1]); + tf_with(ops.control_dependencies(new object[0]), ctl => _test_counter.assign_add(1)); return outputs; } - Dictionary test_step_multi_inputs_function(DataHandler data_handler, Tensor[] data) + Dictionary test_step_multi_inputs_function(DataHandler data_handler, OwnedIterator iterator) { + var data = iterator.next(); var x_size = data_handler.DataAdapter.GetDataset().FirstInputTensorCount; - var outputs = train_step(data_handler, new Tensors(data.Take(x_size).ToArray()), new Tensors(data.Skip(x_size).ToArray())); - tf_with(ops.control_dependencies(new object[0]), ctl => _train_counter.assign_add(1)); + var outputs = test_step(data_handler, data.Take(x_size).ToArray(), data.Skip(x_size).ToArray()); + tf_with(ops.control_dependencies(new object[0]), ctl => _test_counter.assign_add(1)); return outputs; } + + + Dictionary test_step(DataHandler data_handler, Tensors x, Tensors y) + { + (x, y) = data_handler.DataAdapter.Expand1d(x, y); + var y_pred = Apply(x, training: false); + var loss = compiled_loss.Call(y, y_pred); + compiled_metrics.update_state(y, y_pred); + return metrics.Select(x => (x.Name, x.result())).ToDictionary(x => x.Item1, x => (float)x.Item2); + } } } diff --git a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs index 68dc5976c..d6f89d8be 100644 --- a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs +++ b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs @@ -142,6 +142,7 @@ public History fit(IDatasetV2 dataset, int verbose = 1, List callbacks = null, IDatasetV2 validation_data = null, + int validation_step = 10, // 间隔多少次会进行一次验证 bool shuffle = true, int initial_epoch = 0, int max_queue_size = 10, @@ -164,11 +165,11 @@ public History fit(IDatasetV2 dataset, }); - return FitInternal(data_handler, epochs, verbose, callbacks, validation_data: validation_data, + return FitInternal(data_handler, epochs, validation_step, verbose, callbacks, validation_data: validation_data, train_step_func: train_step_function); } - History FitInternal(DataHandler data_handler, int epochs, int verbose, List callbackList, IDatasetV2 validation_data, + History FitInternal(DataHandler data_handler, int epochs, int validation_step, int verbose, List callbackList, IDatasetV2 validation_data, Func> train_step_func) { stop_training = false; @@ -207,6 +208,9 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List 0 && epoch ==0 || (epoch) % validation_step != 0) + continue; + var val_logs = evaluate(validation_data); foreach(var log in val_logs) { @@ -220,6 +224,10 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List train_step_multi_inputs_function(DataHandler data_hand /// /// The logic for one training step. /// - /// + /// + /// + /// /// Dictionary train_step(DataHandler data_handler, Tensors x, Tensors y) { diff --git a/src/TensorFlowNET.Keras/GlobalUsing.cs b/src/TensorFlowNET.Keras/GlobalUsing.cs index bc0798ede..85cd9194c 100644 --- a/src/TensorFlowNET.Keras/GlobalUsing.cs +++ b/src/TensorFlowNET.Keras/GlobalUsing.cs @@ -4,4 +4,5 @@ global using System.Linq; global using static Tensorflow.Binding; global using static Tensorflow.KerasApi; -global using Tensorflow.NumPy; \ No newline at end of file +global using Tensorflow.NumPy; +global using Tensorflow.Keras.Engine; \ No newline at end of file diff --git a/src/TensorFlowNET.Keras/KerasInterface.cs b/src/TensorFlowNET.Keras/KerasInterface.cs index 159564aac..6bc381095 100644 --- a/src/TensorFlowNET.Keras/KerasInterface.cs +++ b/src/TensorFlowNET.Keras/KerasInterface.cs @@ -72,8 +72,8 @@ public Sequential Sequential(params ILayer[] layers) /// /// `Model` groups layers into an object with training and inference features. /// - /// - /// + /// + /// /// public IModel Model(Tensors inputs, Tensors outputs, string name = null) => new Functional(inputs, outputs, name: name); diff --git a/src/TensorFlowNET.Keras/Layers/Attention/BaseDenseAttention.cs b/src/TensorFlowNET.Keras/Layers/Attention/BaseDenseAttention.cs index 19b292727..970a938d2 100644 --- a/src/TensorFlowNET.Keras/Layers/Attention/BaseDenseAttention.cs +++ b/src/TensorFlowNET.Keras/Layers/Attention/BaseDenseAttention.cs @@ -1,24 +1,18 @@ using Tensorflow.Keras.Engine; using Tensorflow.Keras.ArgsDefinition; -using static Tensorflow.Binding; -using static Tensorflow.KerasApi; using System; using System.Collections.Generic; using System.Linq; using Tensorflow.Keras.Saving; using Tensorflow.Common.Types; -/// -/// Base class for attention layers that can be used in sequence DNN/CNN models. -///This file follows the terminology of https://arxiv.org/abs/1706.03762 Figure 2. -///Attention is formed by three tensors: Query, Key and Value. -/// - namespace Tensorflow.Keras.Layers { /// /// Base Attention class for Dense networks. + /// This file follows the terminology of https://arxiv.org/abs/1706.03762 Figure 2. + /// Attention is formed by three tensors: Query, Key and Value. /// This class is suitable for Dense or CNN networks, and not for RNN networks. /// Implementations of attention mechanisms should inherit from this class, and /// reuse the `apply_attention_scores()` method. diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.Activation.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.Activation.cs index 280e91e2c..2c55f8fd5 100644 --- a/src/TensorFlowNET.Keras/Layers/LayersApi.Activation.cs +++ b/src/TensorFlowNET.Keras/Layers/LayersApi.Activation.cs @@ -10,14 +10,14 @@ public partial class LayersApi { public ILayer ELU ( float alpha = 0.1f ) => new ELU(new ELUArgs { Alpha = alpha }); public ILayer SELU () - => new SELU(new LayerArgs { }); + => new SELU(new SELUArgs { }); public ILayer Softmax(int axis = -1) => new Softmax(new SoftmaxArgs { axis = axis }); public ILayer Softmax ( Axis axis ) => new Softmax(new SoftmaxArgs { axis = axis }); - public ILayer Softplus () => new Softplus(new LayerArgs { }); - public ILayer HardSigmoid () => new HardSigmoid(new LayerArgs { }); - public ILayer Softsign () => new Softsign(new LayerArgs { }); - public ILayer Swish () => new Swish(new LayerArgs { }); - public ILayer Tanh () => new Tanh(new LayerArgs { }); - public ILayer Exponential () => new Exponential(new LayerArgs { }); + public ILayer Softplus () => new Softplus(new SoftplusArgs { }); + public ILayer HardSigmoid () => new HardSigmoid(new HardSigmoidArgs { }); + public ILayer Softsign () => new Softsign(new SoftsignArgs { }); + public ILayer Swish () => new Swish(new SwishArgs { }); + public ILayer Tanh () => new Tanh(new TanhArgs { }); + public ILayer Exponential () => new Exponential(new ExponentialArgs { }); } } diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.Merging.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.Merging.cs index d94bfb4d8..bf06b1418 100644 --- a/src/TensorFlowNET.Keras/Layers/LayersApi.Merging.cs +++ b/src/TensorFlowNET.Keras/Layers/LayersApi.Merging.cs @@ -14,7 +14,7 @@ public partial class LayersApi /// Axis along which to concatenate. /// public ILayer Concatenate(int axis = -1) - => new Concatenate(new MergeArgs + => new Concatenate(new ConcatenateArgs { Axis = axis }); diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.cs index 0bdcbc841..928e7e337 100644 --- a/src/TensorFlowNET.Keras/Layers/LayersApi.cs +++ b/src/TensorFlowNET.Keras/Layers/LayersApi.cs @@ -2,9 +2,8 @@ using Tensorflow.Framework.Models; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.ArgsDefinition.Core; -using Tensorflow.Keras.ArgsDefinition.Rnn; using Tensorflow.Keras.Engine; -using Tensorflow.Keras.Layers.Rnn; +using Tensorflow.Keras.Layers; using Tensorflow.NumPy; using static Tensorflow.Binding; using static Tensorflow.KerasApi; @@ -183,9 +182,6 @@ public ILayer Conv2D(int filters, /// Boolean, whether the layer uses a bias vector. /// The name of the initializer for the kernel weights matrix (see keras.initializers). /// The name of the initializer for the bias vector (see keras.initializers). - /// The name of the regularizer function applied to the kernel weights matrix (see keras.regularizers). - /// The name of the regularizer function applied to the bias vector (see keras.regularizers). - /// The name of the regularizer function applied to the output of the layer (its "activation") (see keras.regularizers). /// A tensor of rank 4+ representing activation(conv2d(inputs, kernel) + bias). public ILayer Conv2D(int filters, Shape kernel_size = null, @@ -244,7 +240,7 @@ public ILayer Conv2DTranspose(int filters, string kernel_regularizer = null, string bias_regularizer = null, string activity_regularizer = null) - => new Conv2DTranspose(new Conv2DArgs + => new Conv2DTranspose(new Conv2DTransposeArgs { Rank = 2, Filters = filters, @@ -469,7 +465,7 @@ public ILayer Flatten(string data_format = null) /// In this case, values of 'None' in the 'shape' argument represent ragged dimensions. For more information about RaggedTensors, see this guide. /// /// A tensor. - public Tensors Input(Shape shape = null, + public KerasTensor Input(Shape shape = null, int batch_size = -1, string name = null, TF_DataType dtype = TF_DataType.DtInvalid, @@ -572,7 +568,7 @@ public ILayer MaxPooling1D(int? pool_size = null, int? strides = null, string padding = "valid", string data_format = null) - => new MaxPooling1D(new Pooling1DArgs + => new MaxPooling1D(new MaxPooling1DArgs { PoolSize = pool_size ?? 2, Strides = strides ?? (pool_size ?? 2), @@ -788,7 +784,7 @@ public IRnnCell LSTMCell(int uints, string recurrent_activation = "sigmoid", bool use_bias = true, string kernel_initializer = "glorot_uniform", - string recurrent_initializer = "orthogonal", // TODO(Wanglongzhi2001),glorot_uniform has not been developed. + string recurrent_initializer = "orthogonal", string bias_initializer = "zeros", bool unit_forget_bias = true, float dropout = 0f, @@ -873,6 +869,118 @@ public ILayer LSTM(int units, UnitForgetBias = unit_forget_bias }); + /// + /// Cell class for the GRU layer. + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + public IRnnCell GRUCell( + int units, + string activation = "tanh", + string recurrent_activation = "sigmoid", + bool use_bias = true, + string kernel_initializer = "glorot_uniform", + string recurrent_initializer = "orthogonal", + string bias_initializer = "zeros", + float dropout = 0f, + float recurrent_dropout = 0f, + bool reset_after = true) + => new GRUCell(new GRUCellArgs + { + Units = units, + Activation = keras.activations.GetActivationFromName(activation), + RecurrentActivation = keras.activations.GetActivationFromName(recurrent_activation), + KernelInitializer = GetInitializerByName(kernel_initializer), + RecurrentInitializer = GetInitializerByName(recurrent_initializer), + BiasInitializer = GetInitializerByName(bias_initializer), + UseBias = use_bias, + Dropout = dropout, + RecurrentDropout = recurrent_dropout, + ResetAfter = reset_after + }); + + /// + /// Gated Recurrent Unit - Cho et al. 2014. + /// + /// Positive integer, dimensionality of the output space. + /// Activation function to use. If you pass `None`, no activation is applied.(ie. "linear" activation: `a(x) = x`). + /// Activation function to use for the recurrent step. If you pass `None`, no activation is applied. (ie. "linear" activation: `a(x) = x`). + /// Boolean, (default `True`), whether the layer uses a bias vector. + /// Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. Default: `glorot_uniform`. + /// Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state. Default: `orthogonal`. + /// Initializer for the bias vector. Default: `zeros`. + /// Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. Default: 0. + /// Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. Default: 0. + /// + /// Boolean. Whether to return the last output in the output sequence, or the full sequence. Default: `False`. + /// Boolean. Whether to return the last state in addition to the output. Default: `False`. + /// Boolean (default `False`). If True, process the input sequence backwards and return the reversed sequence. + /// Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. + /// Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used. Unrolling can speed-up a RNN, + /// The shape format of the `inputs` and `outputs` tensors. + /// GRU convention (whether to apply reset gate after or before matrix multiplication). False = "before", True = "after" (default and cuDNN compatible). + /// + public ILayer GRU( + int units, + string activation = "tanh", + string recurrent_activation = "sigmoid", + bool use_bias = true, + string kernel_initializer = "glorot_uniform", + string recurrent_initializer = "orthogonal", + string bias_initializer = "zeros", + float dropout = 0f, + float recurrent_dropout = 0f, + bool return_sequences = false, + bool return_state = false, + bool go_backwards = false, + bool stateful = false, + bool unroll = false, + bool time_major = false, + bool reset_after = true + ) + => new GRU(new GRUArgs + { + Units = units, + Activation = keras.activations.GetActivationFromName(activation), + RecurrentActivation = keras.activations.GetActivationFromName(recurrent_activation), + KernelInitializer = GetInitializerByName(kernel_initializer), + RecurrentInitializer = GetInitializerByName(recurrent_initializer), + BiasInitializer = GetInitializerByName(bias_initializer), + UseBias = use_bias, + Dropout = dropout, + RecurrentDropout = recurrent_dropout, + ReturnSequences = return_sequences, + ReturnState = return_state, + GoBackwards = go_backwards, + Stateful = stateful, + TimeMajor = time_major, + Unroll = unroll, + ResetAfter = reset_after + }); + + public ILayer Bidirectional( + ILayer layer, + string merge_mode = "concat", + NDArray weights = null, + ILayer backward_layer = null) + => new Bidirectional(new BidirectionalArgs + { + Layer = layer, + MergeMode = merge_mode, + Weights = weights, + BackwardLayer = backward_layer + }); + + /// /// /// @@ -895,21 +1003,21 @@ public ILayer Rescaling(float scale, /// /// public ILayer Add() - => new Add(new MergeArgs { }); + => new Add(new AddArgs { }); /// /// /// /// public ILayer Subtract() - => new Subtract(new MergeArgs { }); + => new Subtract(new SubtractArgs { }); /// /// Global max pooling operation for spatial data. /// /// public ILayer GlobalAveragePooling2D() - => new GlobalAveragePooling2D(new Pooling2DArgs { }); + => new GlobalAveragePooling2D(new GlobalAveragePooling2DArgs { }); /// /// Global average pooling operation for temporal data. @@ -919,7 +1027,7 @@ public ILayer GlobalAveragePooling2D() /// /// public ILayer GlobalAveragePooling1D(string data_format = "channels_last") - => new GlobalAveragePooling1D(new Pooling1DArgs { DataFormat = data_format }); + => new GlobalAveragePooling1D(new GlobalAveragePooling1DArgs { DataFormat = data_format }); /// /// Global max pooling operation for spatial data. @@ -928,7 +1036,7 @@ public ILayer GlobalAveragePooling1D(string data_format = "channels_last") /// channels_last corresponds to inputs with shape (batch, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, height, width). /// public ILayer GlobalAveragePooling2D(string data_format = "channels_last") - => new GlobalAveragePooling2D(new Pooling2DArgs { DataFormat = data_format }); + => new GlobalAveragePooling2D(new GlobalAveragePooling2DArgs { DataFormat = data_format }); /// /// Global max pooling operation for 1D temporal data. @@ -939,7 +1047,7 @@ public ILayer GlobalAveragePooling2D(string data_format = "channels_last") /// /// public ILayer GlobalMaxPooling1D(string data_format = "channels_last") - => new GlobalMaxPooling1D(new Pooling1DArgs { DataFormat = data_format }); + => new GlobalMaxPooling1D(new GlobalMaxPooling1DArgs { DataFormat = data_format }); /// /// Global max pooling operation for spatial data. @@ -948,7 +1056,7 @@ public ILayer GlobalMaxPooling1D(string data_format = "channels_last") /// channels_last corresponds to inputs with shape (batch, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, height, width). /// public ILayer GlobalMaxPooling2D(string data_format = "channels_last") - => new GlobalMaxPooling2D(new Pooling2DArgs { DataFormat = data_format }); + => new GlobalMaxPooling2D(new GlobalMaxPooling2DArgs { DataFormat = data_format }); /// /// Get an weights initializer from its name. @@ -983,5 +1091,9 @@ public ILayer Normalization(Shape? input_shape = null, int? axis = -1, float? me Variance = variance, Invert = invert }); + + + + } } diff --git a/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping1D.cs b/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping1D.cs index 312854388..7d5385e6f 100644 --- a/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping1D.cs +++ b/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping1D.cs @@ -2,7 +2,6 @@ using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; using Tensorflow.Common.Types; -using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers.Reshaping { diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/BaseWrapper.cs b/src/TensorFlowNET.Keras/Layers/Rnn/BaseWrapper.cs new file mode 100644 index 000000000..737f88cd4 --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Rnn/BaseWrapper.cs @@ -0,0 +1,33 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Text; +using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Keras.Saving; + +namespace Tensorflow.Keras.Layers +{ + /// + /// Abstract wrapper base class. Wrappers take another layer and augment it in various ways. + /// Do not use this class as a layer, it is only an abstract base class. + /// Two usable wrappers are the `TimeDistributed` and `Bidirectional` wrappers. + /// + public abstract class Wrapper: Layer + { + public ILayer _layer; + public Wrapper(WrapperArgs args):base(args) + { + _layer = args.Layer; + } + + public virtual void Build(KerasShapesWrapper input_shape) + { + if (!_layer.Built) + { + _layer.build(input_shape); + } + built = true; + } + + } +} diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/Bidirectional.cs b/src/TensorFlowNET.Keras/Layers/Rnn/Bidirectional.cs new file mode 100644 index 000000000..0566b08ad --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Rnn/Bidirectional.cs @@ -0,0 +1,285 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using Tensorflow.Common.Types; +using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Keras.Saving; + +namespace Tensorflow.Keras.Layers +{ + /// + /// Bidirectional wrapper for RNNs. + /// + public class Bidirectional: Wrapper + { + int _num_constants = 0; + bool _support_masking = true; + bool _return_state; + bool _stateful; + bool _return_sequences; + BidirectionalArgs _args; + RNNArgs _layer_args_copy; + RNN _forward_layer; + RNN _backward_layer; + RNN _layer; + InputSpec _input_spec; + public Bidirectional(BidirectionalArgs args):base(args) + { + _args = args; + if (_args.Layer is not ILayer) + throw new ValueError( + "Please initialize `Bidirectional` layer with a " + + $"`tf.keras.layers.Layer` instance. Received: {_args.Layer}"); + + if (_args.BackwardLayer is not null && _args.BackwardLayer is not ILayer) + throw new ValueError( + "`backward_layer` need to be a `tf.keras.layers.Layer` " + + $"instance. Received: {_args.BackwardLayer}"); + if (!new List { "sum", "mul", "ave", "concat", null }.Contains(_args.MergeMode)) + { + throw new ValueError( + $"Invalid merge mode. Received: {_args.MergeMode}. " + + "Merge mode should be one of " + + "{\"sum\", \"mul\", \"ave\", \"concat\", null}" + ); + } + if (_args.Layer is RNN) + { + _layer = _args.Layer as RNN; + } + else + { + throw new ValueError( + "Bidirectional only support RNN instance such as LSTM or GRU"); + } + _return_state = _layer.Args.ReturnState; + _return_sequences = _layer.Args.ReturnSequences; + _stateful = _layer.Args.Stateful; + _layer_args_copy = _layer.Args.Clone(); + // We don't want to track `layer` since we're already tracking the two + // copies of it we actually run. + // TODO(Wanglongzhi2001), since the feature of setattr_tracking has not been implemented. + // _setattr_tracking = false; + // super().__init__(layer, **kwargs) + // _setattr_tracking = true; + + // Recreate the forward layer from the original layer config, so that it + // will not carry over any state from the layer. + if (_layer is LSTM) + { + var arg = _layer_args_copy as LSTMArgs; + _forward_layer = new LSTM(arg); + } + else if(_layer is SimpleRNN) + { + var arg = _layer_args_copy as SimpleRNNArgs; + _forward_layer = new SimpleRNN(arg); + } + // TODO(Wanglongzhi2001), add GRU if case. + else + { + _forward_layer = new RNN(_layer.Cell, _layer_args_copy); + } + //_forward_layer = _recreate_layer_from_config(_layer); + if (_args.BackwardLayer is null) + { + _backward_layer = _recreate_layer_from_config(_layer, go_backwards:true); + } + else + { + _backward_layer = _args.BackwardLayer as RNN; + } + _forward_layer.Name = "forward_" + _forward_layer.Name; + _backward_layer.Name = "backward_" + _backward_layer.Name; + _verify_layer_config(); + + void force_zero_output_for_mask(RNN layer) + { + layer.Args.ZeroOutputForMask = layer.Args.ReturnSequences; + } + + force_zero_output_for_mask(_forward_layer); + force_zero_output_for_mask(_backward_layer); + + if (_args.Weights is not null) + { + var nw = len(_args.Weights); + _forward_layer.set_weights(_args.Weights[$":,{nw / 2}"]); + _backward_layer.set_weights(_args.Weights[$"{nw / 2},:"]); + } + + _input_spec = _layer.InputSpec; + } + + private void _verify_layer_config() + { + if (_forward_layer.Args.GoBackwards == _backward_layer.Args.GoBackwards) + { + throw new ValueError( + "Forward layer and backward layer should have different " + + "`go_backwards` value." + + "forward_layer.go_backwards = " + + $"{_forward_layer.Args.GoBackwards}," + + "backward_layer.go_backwards = " + + $"{_backward_layer.Args.GoBackwards}"); + } + if (_forward_layer.Args.Stateful != _backward_layer.Args.Stateful) + { + throw new ValueError( + "Forward layer and backward layer are expected to have "+ + $"the same value for attribute stateful, got "+ + $"{_forward_layer.Args.Stateful} for forward layer and "+ + $"{_backward_layer.Args.Stateful} for backward layer"); + } + if (_forward_layer.Args.ReturnState != _backward_layer.Args.ReturnState) + { + throw new ValueError( + "Forward layer and backward layer are expected to have " + + $"the same value for attribute return_state, got " + + $"{_forward_layer.Args.ReturnState} for forward layer and " + + $"{_backward_layer.Args.ReturnState} for backward layer"); + } + if (_forward_layer.Args.ReturnSequences != _backward_layer.Args.ReturnSequences) + { + throw new ValueError( + "Forward layer and backward layer are expected to have " + + $"the same value for attribute return_sequences, got " + + $"{_forward_layer.Args.ReturnSequences} for forward layer and " + + $"{_backward_layer.Args.ReturnSequences} for backward layer"); + } + } + + private RNN _recreate_layer_from_config(RNN layer, bool go_backwards = false) + { + var config = layer.get_config() as RNNArgs; + var cell = layer.Cell; + if (go_backwards) + { + config.GoBackwards = !config.GoBackwards; + } + + if (layer is LSTM) + { + var arg = config as LSTMArgs; + return new LSTM(arg); + } + else if(layer is SimpleRNN) + { + var arg = config as SimpleRNNArgs; + return new SimpleRNN(arg); + } + // TODO(Wanglongzhi2001), add GRU if case. + else + { + return new RNN(cell, config); + } + } + + public override void build(KerasShapesWrapper input_shape) + { + _buildInputShape = input_shape; + tf_with(ops.name_scope(_forward_layer.Name), scope=> + { + _forward_layer.build(input_shape); + }); + tf_with(ops.name_scope(_backward_layer.Name), scope => + { + _backward_layer.build(input_shape); + }); + built = true; + } + + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) + { + // `Bidirectional.call` implements the same API as the wrapped `RNN`. + Tensors forward_inputs; + Tensors backward_inputs; + Tensors forward_state; + Tensors backward_state; + // if isinstance(inputs, list) and len(inputs) > 1: + if (inputs.Length > 1) + { + // initial_states are keras tensors, which means they are passed + // in together with inputs as list. The initial_states need to be + // split into forward and backward section, and be feed to layers + // accordingly. + forward_inputs = new Tensors { inputs[0] }; + backward_inputs = new Tensors { inputs[0] }; + var pivot = (len(inputs) - _num_constants) / 2 + 1; + // add forward initial state + forward_inputs.Concat(new Tensors { inputs[$"1:{pivot}"] }); + if (_num_constants != 0) + // add backward initial state + backward_inputs.Concat(new Tensors { inputs[$"{pivot}:"] }); + else + { + // add backward initial state + backward_inputs.Concat(new Tensors { inputs[$"{pivot}:{-_num_constants}"] }); + // add constants for forward and backward layers + forward_inputs.Concat(new Tensors { inputs[$"{-_num_constants}:"] }); + backward_inputs.Concat(new Tensors { inputs[$"{-_num_constants}:"] }); + } + forward_state = null; + backward_state = null; + } + else if (state is not null) + { + // initial_states are not keras tensors, eg eager tensor from np + // array. They are only passed in from kwarg initial_state, and + // should be passed to forward/backward layer via kwarg + // initial_state as well. + forward_inputs = inputs; + backward_inputs = inputs; + var half = len(state) / 2; + forward_state = state[$":{half}"]; + backward_state = state[$"{half}:"]; + } + else + { + forward_inputs = inputs; + backward_inputs = inputs; + forward_state = null; + backward_state = null; + } + var y = _forward_layer.Apply(forward_inputs, forward_state); + var y_rev = _backward_layer.Apply(backward_inputs, backward_state); + + Tensors states = new(); + if (_return_state) + { + states = y["1:"] + y_rev["1:"]; + y = y[0]; + y_rev = y_rev[0]; + } + + if (_return_sequences) + { + int time_dim = _forward_layer.Args.TimeMajor ? 0 : 1; + y_rev = keras.backend.reverse(y_rev, time_dim); + } + Tensors output; + if (_args.MergeMode == "concat") + output = keras.backend.concatenate(new Tensors { y.Single(), y_rev.Single() }); + else if (_args.MergeMode == "sum") + output = y.Single() + y_rev.Single(); + else if (_args.MergeMode == "ave") + output = (y.Single() + y_rev.Single()) / 2; + else if (_args.MergeMode == "mul") + output = y.Single() * y_rev.Single(); + else if (_args.MergeMode is null) + output = new Tensors { y.Single(), y_rev.Single() }; + else + throw new ValueError( + "Unrecognized value for `merge_mode`. " + + $"Received: {_args.MergeMode}" + + "Expected values are [\"concat\", \"sum\", \"ave\", \"mul\"]"); + if (_return_state) + { + if (_args.MergeMode is not null) + return new Tensors { output.Single(), states.Single()}; + } + return output; + } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/DropoutRNNCellMixin.cs b/src/TensorFlowNET.Keras/Layers/Rnn/DropoutRNNCellMixin.cs index 75feb8ea2..27c13f349 100644 --- a/src/TensorFlowNET.Keras/Layers/Rnn/DropoutRNNCellMixin.cs +++ b/src/TensorFlowNET.Keras/Layers/Rnn/DropoutRNNCellMixin.cs @@ -6,7 +6,7 @@ using Tensorflow.Keras.Engine; using Tensorflow.Keras.Utils; -namespace Tensorflow.Keras.Layers.Rnn +namespace Tensorflow.Keras.Layers { public abstract class DropoutRNNCellMixin: Layer, IRnnCell { diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/GRU.cs b/src/TensorFlowNET.Keras/Layers/Rnn/GRU.cs new file mode 100644 index 000000000..0919883d2 --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Rnn/GRU.cs @@ -0,0 +1,168 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Common.Extensions; +using Tensorflow.Common.Types; +using Tensorflow.Keras.Saving; + + +namespace Tensorflow.Keras.Layers +{ + public class GRU : RNN + { + GRUArgs _args; + private static GRUCell _cell; + + bool _return_runtime; + public GRUCell Cell { get => _cell; } + public int units { get => _args.Units; } + public Activation activation { get => _args.Activation; } + public Activation recurrent_activation { get => _args.RecurrentActivation; } + public bool use_bias { get => _args.UseBias; } + public float dropout { get => _args.Dropout; } + public float recurrent_dropout { get => _args.RecurrentDropout; } + public IInitializer kernel_initializer { get => _args.KernelInitializer; } + public IInitializer recurrent_initializer { get => _args.RecurrentInitializer; } + public IInitializer bias_initializer { get => _args.BiasInitializer; } + public int implementation { get => _args.Implementation; } + public bool reset_after { get => _args.ResetAfter; } + + public GRU(GRUArgs args) : base(CreateCell(args), PreConstruct(args)) + { + _args = args; + + if (_args.Implementation == 0) + { + // Use the red output to act as a warning message that can also be used under the release version + Console.ForegroundColor = ConsoleColor.Red; + Console.WriteLine("Warning: `implementation=0` has been deprecated, "+ + "and now defaults to `implementation=2`."+ + "Please update your layer call."); + Console.ResetColor(); + } + + GRUCell cell = new GRUCell(new GRUCellArgs + { + Units = _args.Units, + Activation = _args.Activation, + RecurrentActivation = _args.RecurrentActivation, + UseBias = _args.UseBias, + Dropout = _args.Dropout, + RecurrentDropout = _args.RecurrentDropout, + KernelInitializer = _args.KernelInitializer, + RecurrentInitializer = _args.RecurrentInitializer, + BiasInitializer = _args.BiasInitializer, + ResetAfter = _args.ResetAfter, + Implementation = _args.Implementation + }); + _cell = cell; + } + + protected override Tensors Call(Tensors inputs, Tensors initial_state = null, bool? training = null, IOptionalArgs? optional_args = null) + { + GRUOptionalArgs? gru_optional_args = optional_args as GRUOptionalArgs; + if (optional_args is not null && gru_optional_args is null) + { + throw new ArgumentException("The type of optional args should be `GRUOptionalArgs`."); + } + Tensors? mask = gru_optional_args?.Mask; + + // Not support ragger input temporarily; + int row_length = 0; + bool is_ragged_input = false; + + _validate_args_if_ragged(is_ragged_input, mask); + + // GRU does not support constants.Ignore it during process. + (inputs, initial_state, _) = this._process_inputs(inputs, initial_state, null); + + if (mask.Length > 1) + { + mask = mask[0]; + } + + var input_shape = inputs.shape; + var timesteps = _args.TimeMajor ? input_shape[0] : input_shape[1]; + + + // TODO(Wanglongzhi2001), finish _could_use_gpu_kernel part + Func step = (cell_inputs, cell_states) => + { + var res = Cell.Apply(cell_inputs, cell_states, training is null ? true : training.Value); + var (output, state) = res; + return (output, state); + }; + + var (last_output, outputs, states) = keras.backend.rnn( + step, + inputs, + initial_state, + constants: null, + go_backwards: _args.GoBackwards, + mask: mask, + unroll: _args.Unroll, + input_length: ops.convert_to_tensor(timesteps), + time_major: _args.TimeMajor, + zero_output_for_mask: base.Args.ZeroOutputForMask, + return_all_outputs: _args.ReturnSequences + ); + + Tensors output; + if (_args.ReturnSequences) + { + output = outputs; + } + else + { + output = last_output; + } + + if (_args.ReturnState) + { + output = new Tensors { output, states }; + } + return output; + } + + private static IRnnCell CreateCell(GRUArgs gruArgs) + { + return new GRUCell(new GRUCellArgs + { + Units = gruArgs.Units, + Activation = gruArgs.Activation, + RecurrentActivation = gruArgs.RecurrentActivation, + UseBias = gruArgs.UseBias, + Dropout = gruArgs.Dropout, + RecurrentDropout = gruArgs.RecurrentDropout, + KernelInitializer = gruArgs.KernelInitializer, + RecurrentInitializer = gruArgs.RecurrentInitializer, + BiasInitializer = gruArgs.BiasInitializer, + ResetAfter = gruArgs.ResetAfter, + Implementation = gruArgs.Implementation + }); + } + + private static RNNArgs PreConstruct(GRUArgs args) + { + return new RNNArgs + { + ReturnSequences = args.ReturnSequences, + ReturnState = args.ReturnState, + GoBackwards = args.GoBackwards, + Stateful = args.Stateful, + Unroll = args.Unroll, + TimeMajor = args.TimeMajor, + Units = args.Units, + Activation = args.Activation, + RecurrentActivation = args.RecurrentActivation, + UseBias = args.UseBias, + Dropout = args.Dropout, + RecurrentDropout = args.RecurrentDropout, + KernelInitializer = args.KernelInitializer, + RecurrentInitializer = args.RecurrentInitializer, + BiasInitializer = args.BiasInitializer + }; + } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/GRUCell.cs b/src/TensorFlowNET.Keras/Layers/Rnn/GRUCell.cs new file mode 100644 index 000000000..2b9c01e31 --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Rnn/GRUCell.cs @@ -0,0 +1,281 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Text; +using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Common.Extensions; +using Tensorflow.Common.Types; +using Tensorflow.Keras.Saving; + +namespace Tensorflow.Keras.Layers +{ + /// + /// Cell class for the GRU layer. + /// + public class GRUCell : DropoutRNNCellMixin + { + GRUCellArgs _args; + IVariableV1 _kernel; + IVariableV1 _recurrent_kernel; + IInitializer _bias_initializer; + IVariableV1 _bias; + INestStructure _state_size; + INestStructure _output_size; + int Units; + public override INestStructure StateSize => _state_size; + + public override INestStructure OutputSize => _output_size; + + public override bool SupportOptionalArgs => false; + public GRUCell(GRUCellArgs args) : base(args) + { + _args = args; + if (_args.Units <= 0) + { + throw new ValueError( + $"units must be a positive integer, got {args.Units}"); + } + _args.Dropout = Math.Min(1f, Math.Max(0f, _args.Dropout)); + _args.RecurrentDropout = Math.Min(1f, Math.Max(0f, this._args.RecurrentDropout)); + if (_args.RecurrentDropout != 0f && _args.Implementation != 1) + { + Debug.WriteLine("RNN `implementation=2` is not supported when `recurrent_dropout` is set." + + "Using `implementation=1`."); + _args.Implementation = 1; + } + Units = _args.Units; + _state_size = new NestList(Units); + _output_size = new NestNode(Units); + } + + public override void build(KerasShapesWrapper input_shape) + { + //base.build(input_shape); + + var single_shape = input_shape.ToSingleShape(); + var input_dim = single_shape[-1]; + + _kernel = add_weight("kernel", (input_dim, _args.Units * 3), + initializer: _args.KernelInitializer + ); + + _recurrent_kernel = add_weight("recurrent_kernel", (Units, Units * 3), + initializer: _args.RecurrentInitializer + ); + if (_args.UseBias) + { + Shape bias_shape; + if (!_args.ResetAfter) + { + bias_shape = new Shape(3 * Units); + } + else + { + bias_shape = (2, 3 * Units); + } + _bias = add_weight("bias", bias_shape, + initializer: _bias_initializer + ); + } + built = true; + } + + protected override Tensors Call(Tensors inputs, Tensors states = null, bool? training = null, IOptionalArgs? optional_args = null) + { + var h_tm1 = states.IsNested() ? states[0] : states.Single(); + var dp_mask = get_dropout_mask_for_cell(inputs, training.Value, count: 3); + var rec_dp_mask = get_recurrent_dropout_mask_for_cell(h_tm1, training.Value, count: 3); + + IVariableV1 input_bias = _bias; + IVariableV1 recurrent_bias = _bias; + if (_args.UseBias) + { + if (!_args.ResetAfter) + { + input_bias = _bias; + recurrent_bias = null; + } + else + { + input_bias = tf.Variable(tf.unstack(_bias.AsTensor())[0]); + recurrent_bias = tf.Variable(tf.unstack(_bias.AsTensor())[1]); + } + } + + + Tensor hh; + Tensor z; + if ( _args.Implementation == 1) + { + Tensor inputs_z; + Tensor inputs_r; + Tensor inputs_h; + if (0f < _args.Dropout && _args.Dropout < 1f) + { + inputs_z = inputs * dp_mask[0]; + inputs_r = inputs * dp_mask[1]; + inputs_h = inputs * dp_mask[2]; + } + else + { + inputs_z = inputs.Single(); + inputs_r = inputs.Single(); + inputs_h = inputs.Single(); + } + + + int startIndex = (int)_kernel.AsTensor().shape[0]; + var _kernel_slice = tf.slice(_kernel.AsTensor(), + new[] { 0, 0 }, new[] { startIndex, Units }); + var x_z = math_ops.matmul(inputs_z, _kernel_slice); + _kernel_slice = tf.slice(_kernel.AsTensor(), + new[] { 0, Units }, new[] { Units, Units}); + var x_r = math_ops.matmul( + inputs_r, _kernel_slice); + int endIndex = (int)_kernel.AsTensor().shape[1]; + _kernel_slice = tf.slice(_kernel.AsTensor(), + new[] { 0, Units * 2 }, new[] { startIndex, endIndex - Units * 2 }); + var x_h = math_ops.matmul(inputs_h, _kernel_slice); + + if(_args.UseBias) + { + x_z = tf.nn.bias_add( + x_z, tf.Variable(input_bias.AsTensor()[$":{Units}"])); + x_r = tf.nn.bias_add( + x_r, tf.Variable(input_bias.AsTensor()[$"{Units}:{Units * 2}"])); + x_h = tf.nn.bias_add( + x_h, tf.Variable(input_bias.AsTensor()[$"{Units * 2}:"])); + } + + Tensor h_tm1_z; + Tensor h_tm1_r; + Tensor h_tm1_h; + if (0f < _args.RecurrentDropout && _args.RecurrentDropout < 1f) + { + h_tm1_z = h_tm1 * rec_dp_mask[0]; + h_tm1_r = h_tm1 * rec_dp_mask[1]; + h_tm1_h = h_tm1 * rec_dp_mask[2]; + } + else + { + h_tm1_z = h_tm1; + h_tm1_r = h_tm1; + h_tm1_h = h_tm1; + } + + startIndex = (int)_recurrent_kernel.AsTensor().shape[0]; + var _recurrent_kernel_slice = tf.slice(_recurrent_kernel.AsTensor(), + new[] { 0, 0 }, new[] { startIndex, Units }); + var recurrent_z = math_ops.matmul( + h_tm1_z, _recurrent_kernel_slice); + _recurrent_kernel_slice = tf.slice(_recurrent_kernel.AsTensor(), + new[] { 0, Units }, new[] { startIndex, Units}); + var recurrent_r = math_ops.matmul( + h_tm1_r, _recurrent_kernel_slice); + if(_args.ResetAfter && _args.UseBias) + { + recurrent_z = tf.nn.bias_add( + recurrent_z, tf.Variable(recurrent_bias.AsTensor()[$":{Units}"])); + recurrent_r = tf.nn.bias_add( + recurrent_r, tf.Variable(recurrent_bias.AsTensor()[$"{Units}: {Units * 2}"])); + } + z = _args.RecurrentActivation.Apply(x_z + recurrent_z); + var r = _args.RecurrentActivation.Apply(x_r + recurrent_r); + + Tensor recurrent_h; + if (_args.ResetAfter) + { + endIndex = (int)_recurrent_kernel.AsTensor().shape[1]; + _recurrent_kernel_slice = tf.slice(_recurrent_kernel.AsTensor(), + new[] { 0, Units * 2 }, new[] { startIndex, endIndex - Units * 2 }); + recurrent_h = math_ops.matmul( + h_tm1_h, _recurrent_kernel_slice); + if(_args.UseBias) + { + recurrent_h = tf.nn.bias_add( + recurrent_h, tf.Variable(recurrent_bias.AsTensor()[$"{Units * 2}:"])); + } + recurrent_h *= r; + } + else + { + _recurrent_kernel_slice = tf.slice(_recurrent_kernel.AsTensor(), + new[] { 0, Units * 2 }, new[] { startIndex, endIndex - Units * 2 }); + recurrent_h = math_ops.matmul( + r * h_tm1_h, _recurrent_kernel_slice); + } + hh = _args.Activation.Apply(x_h + recurrent_h); + } + else + { + if (0f < _args.Dropout && _args.Dropout < 1f) + { + inputs = inputs * dp_mask[0]; + } + + var matrix_x = math_ops.matmul(inputs, _kernel.AsTensor()); + if(_args.UseBias) + { + matrix_x = tf.nn.bias_add(matrix_x, input_bias); + } + var matrix_x_spilted = tf.split(matrix_x, 3, axis: -1); + var x_z = matrix_x_spilted[0]; + var x_r = matrix_x_spilted[1]; + var x_h = matrix_x_spilted[2]; + + Tensor matrix_inner; + if (_args.ResetAfter) + { + matrix_inner = math_ops.matmul(h_tm1, _recurrent_kernel.AsTensor()); + if ( _args.UseBias) + { + matrix_inner = tf.nn.bias_add( + matrix_inner, recurrent_bias); + } + } + else + { + var startIndex = (int)_recurrent_kernel.AsTensor().shape[0]; + var _recurrent_kernel_slice = tf.slice(_recurrent_kernel.AsTensor(), + new[] { 0, 0 }, new[] { startIndex, Units * 2 }); + matrix_inner = math_ops.matmul( + h_tm1, _recurrent_kernel_slice); + } + + var matrix_inner_splitted = tf.split(matrix_inner, new int[] {Units, Units, -1}, axis:-1); + var recurrent_z = matrix_inner_splitted[0]; + var recurrent_r = matrix_inner_splitted[0]; + var recurrent_h = matrix_inner_splitted[0]; + + z = _args.RecurrentActivation.Apply(x_z + recurrent_z); + var r = _args.RecurrentActivation.Apply(x_r + recurrent_r); + + if(_args.ResetAfter) + { + recurrent_h = r * recurrent_h; + } + else + { + var startIndex = (int)_recurrent_kernel.AsTensor().shape[0]; + var endIndex = (int)_recurrent_kernel.AsTensor().shape[1]; + var _recurrent_kernel_slice = tf.slice(_recurrent_kernel.AsTensor(), + new[] { 0, 2*Units }, new[] { startIndex, endIndex - 2 * Units }); + recurrent_h = math_ops.matmul( + r * h_tm1, _recurrent_kernel_slice); + } + hh = _args.Activation.Apply(x_h + recurrent_h); + } + var h = z * h_tm1 + (1 - z) * hh; + if (states.IsNested()) + { + var new_state = new NestList(h); + return new Nest(new INestStructure[] { new NestNode(h), new_state }).ToTensors(); + } + else + { + return new Nest(new INestStructure[] { new NestNode(h), new NestNode(h)}).ToTensors(); + } + + } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/LSTM.cs b/src/TensorFlowNET.Keras/Layers/Rnn/LSTM.cs index 025465fd6..c766e8d69 100644 --- a/src/TensorFlowNET.Keras/Layers/Rnn/LSTM.cs +++ b/src/TensorFlowNET.Keras/Layers/Rnn/LSTM.cs @@ -1,10 +1,11 @@ using System.Linq; -using Tensorflow.Keras.ArgsDefinition.Rnn; +using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Common.Types; using Tensorflow.Common.Extensions; +using Tensorflow.Keras.Saving; -namespace Tensorflow.Keras.Layers.Rnn +namespace Tensorflow.Keras.Layers { /// /// Long Short-Term Memory layer - Hochreiter 1997. @@ -14,15 +15,15 @@ namespace Tensorflow.Keras.Layers.Rnn /// public class LSTM : RNN { - LSTMArgs args; + LSTMArgs _args; InputSpec[] _state_spec; InputSpec _input_spec; bool _could_use_gpu_kernel; - + public LSTMArgs Args { get => _args; } public LSTM(LSTMArgs args) : base(CreateCell(args), args) { - this.args = args; + _args = args; _input_spec = new InputSpec(ndim: 3); _state_spec = new[] { args.Units, args.Units }.Select(dim => new InputSpec(shape: (-1, dim))).ToArray(); _could_use_gpu_kernel = args.Activation == keras.activations.Tanh @@ -71,7 +72,7 @@ protected override Tensors Call(Tensors inputs, Tensors initial_state = null, bo var single_input = inputs.Single; var input_shape = single_input.shape; - var timesteps = args.TimeMajor ? input_shape[0] : input_shape[1]; + var timesteps = _args.TimeMajor ? input_shape[0] : input_shape[1]; _maybe_reset_cell_dropout_mask(Cell); @@ -87,26 +88,26 @@ protected override Tensors Call(Tensors inputs, Tensors initial_state = null, bo inputs, initial_state, constants: null, - go_backwards: args.GoBackwards, + go_backwards: _args.GoBackwards, mask: mask, - unroll: args.Unroll, + unroll: _args.Unroll, input_length: ops.convert_to_tensor(timesteps), - time_major: args.TimeMajor, - zero_output_for_mask: args.ZeroOutputForMask, - return_all_outputs: args.ReturnSequences + time_major: _args.TimeMajor, + zero_output_for_mask: _args.ZeroOutputForMask, + return_all_outputs: _args.ReturnSequences ); Tensor output; - if (args.ReturnSequences) + if (_args.ReturnSequences) { - output = keras.backend.maybe_convert_to_ragged(false, outputs, (int)timesteps, args.GoBackwards); + output = keras.backend.maybe_convert_to_ragged(false, outputs, (int)timesteps, _args.GoBackwards); } else { output = last_output; } - if (args.ReturnState) + if (_args.ReturnState) { return new Tensor[] { output }.Concat(states).ToArray().ToTensors(); } @@ -115,5 +116,11 @@ protected override Tensors Call(Tensors inputs, Tensors initial_state = null, bo return output; } } + + public override IKerasConfig get_config() + { + return _args; + } + } } diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/LSTMCell.cs b/src/TensorFlowNET.Keras/Layers/Rnn/LSTMCell.cs index 284a2b778..e4fc6dd22 100644 --- a/src/TensorFlowNET.Keras/Layers/Rnn/LSTMCell.cs +++ b/src/TensorFlowNET.Keras/Layers/Rnn/LSTMCell.cs @@ -3,12 +3,12 @@ using System.Diagnostics; using Tensorflow.Common.Extensions; using Tensorflow.Common.Types; -using Tensorflow.Keras.ArgsDefinition.Rnn; +using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; using Tensorflow.Keras.Utils; -namespace Tensorflow.Keras.Layers.Rnn +namespace Tensorflow.Keras.Layers { /// /// Cell class for the LSTM layer. diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs b/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs index f86de8a85..fec75559c 100644 --- a/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs +++ b/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs @@ -3,7 +3,6 @@ using System.Collections.Generic; using System.Reflection; using Tensorflow.Keras.ArgsDefinition; -using Tensorflow.Keras.ArgsDefinition.Rnn; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; using Tensorflow.Util; @@ -14,7 +13,7 @@ using System.Runtime.CompilerServices; // from tensorflow.python.distribute import distribution_strategy_context as ds_context; -namespace Tensorflow.Keras.Layers.Rnn +namespace Tensorflow.Keras.Layers { /// /// Base class for recurrent layers. @@ -26,13 +25,15 @@ public class RNN : RnnBase private RNNArgs _args; private object _input_spec = null; // or NoneValue?? private object _state_spec = null; - private Tensors _states = null; private object _constants_spec = null; + private Tensors _states = null; private int _num_constants; protected IVariableV1 _kernel; protected IVariableV1 _bias; private IRnnCell _cell; - protected IRnnCell Cell + + public RNNArgs Args { get => _args; } + public IRnnCell Cell { get { @@ -185,6 +186,7 @@ private Tensors compute_mask(Tensors inputs, Tensors mask) public override void build(KerasShapesWrapper input_shape) { + _buildInputShape = input_shape; input_shape = new KerasShapesWrapper(input_shape.Shapes[0]); InputSpec get_input_spec(Shape shape) @@ -242,10 +244,9 @@ object get_state_spec(Shape shape) /// /// /// - /// Binary tensor of shape [batch_size, timesteps] indicating whether a given timestep should be masked - /// /// List of initial state tensors to be passed to the first call of the cell - /// List of constant tensors to be passed to the cell at each timestep + /// + /// /// /// /// @@ -393,7 +394,7 @@ protected override Tensors Call(Tensors inputs, Tensors initial_state = null, bo } } - public override Tensors Apply(Tensors inputs, Tensors initial_states = null, bool training = false, IOptionalArgs? optional_args = null) + public override Tensors Apply(Tensors inputs, Tensors initial_states = null, bool? training = false, IOptionalArgs? optional_args = null) { RnnOptionalArgs? rnn_optional_args = optional_args as RnnOptionalArgs; if (optional_args is not null && rnn_optional_args is null) @@ -468,7 +469,7 @@ public override Tensors Apply(Tensors inputs, Tensors initial_states = null, boo return (inputs, initial_state, constants); } - private void _validate_args_if_ragged(bool is_ragged_input, Tensors mask) + protected void _validate_args_if_ragged(bool is_ragged_input, Tensors mask) { if (!is_ragged_input) { @@ -527,54 +528,19 @@ public Tensors __call__(Tensors inputs, Tensor state = null, Tensor training = n throw new NotImplementedException(); } - // 好像不能cell不能传接口类型 - //public RNN New(IRnnArgCell cell, - // bool return_sequences = false, - // bool return_state = false, - // bool go_backwards = false, - // bool stateful = false, - // bool unroll = false, - // bool time_major = false) - // => new RNN(new RNNArgs - // { - // Cell = cell, - // ReturnSequences = return_sequences, - // ReturnState = return_state, - // GoBackwards = go_backwards, - // Stateful = stateful, - // Unroll = unroll, - // TimeMajor = time_major - // }); - - //public RNN New(List cell, - // bool return_sequences = false, - // bool return_state = false, - // bool go_backwards = false, - // bool stateful = false, - // bool unroll = false, - // bool time_major = false) - // => new RNN(new RNNArgs - // { - // Cell = cell, - // ReturnSequences = return_sequences, - // ReturnState = return_state, - // GoBackwards = go_backwards, - // Stateful = stateful, - // Unroll = unroll, - // TimeMajor = time_major - // }); - - protected Tensors get_initial_state(Tensors inputs) { var input = inputs[0]; var input_shape = array_ops.shape(inputs); var batch_size = _args.TimeMajor ? input_shape[1] : input_shape[0]; var dtype = input.dtype; - Tensors init_state = Cell.GetInitialState(null, batch_size, dtype); - return init_state; } + + public override IKerasConfig get_config() + { + return _args; + } } } diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/RnnBase.cs b/src/TensorFlowNET.Keras/Layers/Rnn/RnnBase.cs index 018b17780..1419da4b2 100644 --- a/src/TensorFlowNET.Keras/Layers/Rnn/RnnBase.cs +++ b/src/TensorFlowNET.Keras/Layers/Rnn/RnnBase.cs @@ -4,7 +4,7 @@ using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; -namespace Tensorflow.Keras.Layers.Rnn +namespace Tensorflow.Keras.Layers { public abstract class RnnBase: Layer { diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNN.cs b/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNN.cs index a22f31c7d..9c199eb43 100644 --- a/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNN.cs +++ b/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNN.cs @@ -1,11 +1,11 @@ using System.Data; -using Tensorflow.Keras.ArgsDefinition.Rnn; +using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Saving; using Tensorflow.Operations.Activation; using static HDF.PInvoke.H5Z; using static Tensorflow.ApiDef.Types; -namespace Tensorflow.Keras.Layers.Rnn +namespace Tensorflow.Keras.Layers { public class SimpleRNN : RNN { diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNNCell.cs b/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNNCell.cs index c77f77790..e74b56925 100644 --- a/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNNCell.cs +++ b/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNNCell.cs @@ -1,7 +1,7 @@ using System; using System.Collections.Generic; using System.Text; -using Tensorflow.Keras.ArgsDefinition.Rnn; +using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; using Tensorflow.Common.Types; @@ -9,7 +9,7 @@ using Tensorflow.Keras.Utils; using Tensorflow.Graphs; -namespace Tensorflow.Keras.Layers.Rnn +namespace Tensorflow.Keras.Layers { /// /// Cell class for SimpleRNN. diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/StackedRNNCells.cs b/src/TensorFlowNET.Keras/Layers/Rnn/StackedRNNCells.cs index 8799bfb23..ece2bc5bf 100644 --- a/src/TensorFlowNET.Keras/Layers/Rnn/StackedRNNCells.cs +++ b/src/TensorFlowNET.Keras/Layers/Rnn/StackedRNNCells.cs @@ -3,12 +3,12 @@ using System.Linq; using Tensorflow.Common.Extensions; using Tensorflow.Common.Types; -using Tensorflow.Keras.ArgsDefinition.Rnn; +using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; using Tensorflow.Keras.Utils; -namespace Tensorflow.Keras.Layers.Rnn +namespace Tensorflow.Keras.Layers { public class StackedRNNCells : Layer, IRnnCell { diff --git a/src/TensorFlowNET.Keras/Models/ModelsApi.cs b/src/TensorFlowNET.Keras/Models/ModelsApi.cs index 44dca58d0..2605c41e3 100644 --- a/src/TensorFlowNET.Keras/Models/ModelsApi.cs +++ b/src/TensorFlowNET.Keras/Models/ModelsApi.cs @@ -1,22 +1,15 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Text; -using Tensorflow.Keras.Engine; -using Tensorflow.Keras.Saving; +using Tensorflow.Keras.Saving; using Tensorflow.Keras.Saving.SavedModel; -using ThirdParty.Tensorflow.Python.Keras.Protobuf; -namespace Tensorflow.Keras.Models +namespace Tensorflow.Keras.Models; + +public class ModelsApi: IModelsApi { - public class ModelsApi: IModelsApi - { - public Functional from_config(FunctionalConfig config) - => Functional.from_config(config); + public Functional from_config(FunctionalConfig config) + => Functional.from_config(config); - public IModel load_model(string filepath, bool compile = true, LoadOptions? options = null) - { - return KerasLoadModelUtils.load_model(filepath, compile: compile, options: options) as Model; - } + public IModel load_model(string filepath, bool compile = true, LoadOptions? options = null) + { + return KerasLoadModelUtils.load_model(filepath, compile: compile, options: options) as Model; } } diff --git a/src/TensorFlowNET.Keras/Optimizers/AdamW.cs b/src/TensorFlowNET.Keras/Optimizers/AdamW.cs new file mode 100644 index 000000000..d111b5d3a --- /dev/null +++ b/src/TensorFlowNET.Keras/Optimizers/AdamW.cs @@ -0,0 +1,64 @@ +namespace Tensorflow.Keras.Optimizers +{ + public class AdamW : Adam + { + string name; + float weight_decay; + DeviceDType deType; + List no_decay_params = null; + public AdamW(float learning_rate= 0.001f, + float weight_decay= 0.004f, + float beta_1= 0.9f, + float beta_2= 0.999f, + float epsilon= 1e-7f, + bool amsgrad = false, + List no_decay_params = null, + string name= "AdamW") : base(learning_rate, beta_1, beta_2, epsilon, amsgrad) + { + this.name = name; + this.weight_decay = weight_decay; + this.no_decay_params = no_decay_params; + } + + protected Operation _decay_weights_op(IVariableV1 var, float learning_rate, Dictionary> apply_state) + { + bool do_decay = _do_use_weight_decay(var.Name); + if (do_decay) return var.assign_add( + -learning_rate * var.AsTensor() * apply_state[deType]["weight_decay"]); + return tf.no_op(); + } + + + protected bool _do_use_weight_decay(string param_name) + { + // Whether to use L2 weight decay for `param_name`. + if (this.weight_decay == 0) + return false; + + if (this.no_decay_params != null) + { + foreach (var name in no_decay_params) + { + if (param_name.Contains(name)) return false; + } + + } + return true; + } + + protected override Operation _resource_apply_dense(IVariableV1 var, Tensor grad, Dictionary> apply_state) + { + var decay = _decay_weights_op(var, _hyper["learning_rate"], apply_state); + tf.control_dependencies(new[] { decay }); + return base._resource_apply_dense(var, grad, apply_state); + } + + protected override void _prepare_local(DeviceDType device_dtype, Dictionary> apply_state) + { + this.deType = device_dtype; + base._prepare_local(device_dtype, apply_state); + apply_state[device_dtype]["weight_decay"] = tf.constant( + weight_decay, name: "adam_weight_decay_rate"); + } + } +} diff --git a/src/TensorFlowNET.Keras/Optimizers/OptimizerApi.cs b/src/TensorFlowNET.Keras/Optimizers/OptimizerApi.cs index 31eb88be7..a237499f9 100644 --- a/src/TensorFlowNET.Keras/Optimizers/OptimizerApi.cs +++ b/src/TensorFlowNET.Keras/Optimizers/OptimizerApi.cs @@ -29,6 +29,22 @@ public IOptimizer Adam(float learning_rate = 0.001f, amsgrad: amsgrad, name: name); + public IOptimizer AdamW(float learning_rate = 0.001f, + float weight_decay = 0.004f, + float beta_1 = 0.9f, + float beta_2 = 0.999f, + float epsilon = 1e-7f, + bool amsgrad = false, + List no_decay_params = null, + string name = "AdamW") => new AdamW(learning_rate: learning_rate, + beta_1: beta_1, + beta_2: beta_2, + epsilon: epsilon, + amsgrad: amsgrad, + name: name, + weight_decay: weight_decay, + no_decay_params: no_decay_params); + /// /// Construct a new RMSprop optimizer. /// @@ -55,7 +71,7 @@ public IOptimizer RMSprop(float learning_rate = 0.001f, Name = name }); - public IOptimizer SGD(float learning_rate) - => new SGD(learning_rate); + public IOptimizer SGD(float learning_rate = 0.01f, float momentum = 0f) + => new SGD(learning_rate, momentum); } } diff --git a/src/TensorFlowNET.Keras/Optimizers/SGD.cs b/src/TensorFlowNET.Keras/Optimizers/SGD.cs index f97f4b15f..1d9ceb810 100644 --- a/src/TensorFlowNET.Keras/Optimizers/SGD.cs +++ b/src/TensorFlowNET.Keras/Optimizers/SGD.cs @@ -22,6 +22,8 @@ public SGD(float learning_rate, _set_hyper("decay", decay); _momentum = momentum > 0; + if (momentum < 0 || momentum > 1) + throw new ValueError($"momentum must be a number between 0 and 1, got {momentum}."); _set_hyper("momentum", momentum); @@ -30,6 +32,13 @@ public SGD(float learning_rate, #pragma warning restore CS1717 // Assignment made to same variable } + protected override void _create_slots(IVariableV1[] var_list) + { + if (_momentum) + foreach (var var in var_list) + add_slot(var, "momentum"); + } + protected override void _prepare_local(DeviceDType device_dtype, Dictionary> _apply_state) { @@ -43,7 +52,15 @@ protected override Operation _resource_apply_dense(IVariableV1 var, Tensor grad, { if (_momentum) { - throw new NotImplementedException("_resource_apply_dense"); + var momentum_var = get_slot(var, "momentum"); + return gen_training_ops.resource_apply_keras_momentum( + var.Handle, + momentum_var.Handle, + _get_hyper("learning_rate", var.dtype), + grad, + _get_hyper("momentum", var.dtype), + use_locking: _use_locking, + use_nesterov: nesterov); } var device_dtype = _apply_state.Keys.FirstOrDefault(x => x.Device == var.Device && x.DType == var.dtype.as_base_dtype()); diff --git a/src/TensorFlowNET.Keras/Preprocessings/DatasetUtils.get_training_or_validation_split.cs b/src/TensorFlowNET.Keras/Preprocessings/DatasetUtils.get_training_or_validation_split.cs index 2f3d8f527..18ca404ef 100644 --- a/src/TensorFlowNET.Keras/Preprocessings/DatasetUtils.get_training_or_validation_split.cs +++ b/src/TensorFlowNET.Keras/Preprocessings/DatasetUtils.get_training_or_validation_split.cs @@ -6,7 +6,7 @@ namespace Tensorflow.Keras.Preprocessings public partial class DatasetUtils { /// - /// Potentially restict samples & labels to a training or validation split. + /// Potentially restict samples and labels to a training or validation split. /// /// /// diff --git a/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.image_dataset_from_directory.cs b/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.image_dataset_from_directory.cs index 4acae4265..377ac4de7 100644 --- a/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.image_dataset_from_directory.cs +++ b/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.image_dataset_from_directory.cs @@ -8,6 +8,37 @@ public partial class Preprocessing { public static string[] WHITELIST_FORMATS = new[] { ".bmp", ".gif", ".jpeg", ".jpg", ".png" }; + /// + /// Function that calculates the classification statistics for a given array of classified data. + /// The function takes an array of classified data as input and returns a dictionary containing the count and percentage of each class in the input array. + /// This function can be used to analyze the distribution of classes in a dataset or to evaluate the performance of a classification model. + /// + /// + /// code from copilot + /// + /// + /// + Dictionary get_classification_statistics(int[] label_ids, string[] label_class_names) + { + var countDict = label_ids.GroupBy(x => x) + .ToDictionary(g => g.Key, g => g.Count()); + var totalCount = label_ids.Length; + var ratioDict = label_class_names.ToDictionary(name => name, + name => + (double)(countDict.ContainsKey(Array.IndexOf(label_class_names, name)) + ? countDict[Array.IndexOf(label_class_names, name)] : 0) + / totalCount); + + print("Classification statistics:"); + foreach (string labelName in label_class_names) + { + double ratio = ratioDict[labelName]; + print($"{labelName}: {ratio * 100:F2}%"); + } + + return ratioDict; + } + /// /// Generates a `tf.data.Dataset` from image files in a directory. /// https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image_dataset_from_directory @@ -53,11 +84,13 @@ public IDatasetV2 image_dataset_from_directory(string directory, follow_links: follow_links); (image_paths, label_list) = keras.preprocessing.dataset_utils.get_training_or_validation_split(image_paths, label_list, validation_split, subset); + get_classification_statistics(label_list, class_name_list); var dataset = paths_and_labels_to_dataset(image_paths, image_size, num_channels, label_list, label_mode, class_name_list.Length, interpolation); if (shuffle) dataset = dataset.shuffle(batch_size * 8, seed: seed); dataset = dataset.batch(batch_size); + dataset.class_names = class_name_list; return dataset; } diff --git a/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.paths_and_labels_to_dataset.cs b/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.paths_and_labels_to_dataset.cs index b4d583878..232f81eb5 100644 --- a/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.paths_and_labels_to_dataset.cs +++ b/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.paths_and_labels_to_dataset.cs @@ -6,6 +6,32 @@ namespace Tensorflow.Keras { public partial class Preprocessing { + + /// + /// 图片路径转为数据处理用的dataset + /// 通常用于预测时读取图片 + /// + /// + /// + /// + /// + /// 用于调整大小的插值方法。支持`bilinear`、`nearest`、`bicubic`、`area`、`lanczos3`、`lanczos5`、`gaussian`、`mitchellcubic`。 + /// 默认为`'bilinear'`。 + /// + /// + public IDatasetV2 paths_to_dataset(string[] image_paths, + Shape image_size, + int num_channels = 3, + int num_classes = 6, + string interpolation = "bilinear") + { + var path_ds = tf.data.Dataset.from_tensor_slices(image_paths); + var img_ds = path_ds.map(x => path_to_image(x, image_size, num_channels, interpolation)); + var label_ds = dataset_utils.labels_to_dataset(new int[num_classes] , "", num_classes); + + return img_ds; + } + public IDatasetV2 paths_and_labels_to_dataset(string[] image_paths, Shape image_size, int num_channels, diff --git a/src/TensorFlowNET.Keras/Saving/KerasObjectLoader.cs b/src/TensorFlowNET.Keras/Saving/KerasObjectLoader.cs index 396ad20eb..0bd816ccb 100644 --- a/src/TensorFlowNET.Keras/Saving/KerasObjectLoader.cs +++ b/src/TensorFlowNET.Keras/Saving/KerasObjectLoader.cs @@ -13,7 +13,6 @@ using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Layers; -using Tensorflow.Keras.Layers.Rnn; using Tensorflow.Keras.Losses; using Tensorflow.Keras.Metrics; using Tensorflow.Keras.Saving.SavedModel; @@ -174,7 +173,7 @@ public void del_tracking() } if(node is Functional functional) { - foreach(var name in functional.UnconditionalDependencyNames.Keys) + foreach(var name in functional.UnconditionalDependencyNames.Keys.ToArray()) { if(Regex.Match(name, @"^layer(_with_weights)?-[\d+]").Success) { @@ -693,7 +692,6 @@ private bool _try_build_layer(Layer obj, int node_id, KerasShapesWrapper build_i /// Infers input shape of layer from SavedModel functions. /// /// - /// /// private TensorSpec _infer_inputs(int layer_node_id) { diff --git a/src/TensorFlowNET.Keras/Saving/SavedModel/load.cs b/src/TensorFlowNET.Keras/Saving/SavedModel/load.cs index aa763fc2e..091dbb810 100644 --- a/src/TensorFlowNET.Keras/Saving/SavedModel/load.cs +++ b/src/TensorFlowNET.Keras/Saving/SavedModel/load.cs @@ -1,97 +1,89 @@ -using Google.Protobuf; -using System; -using System.Collections.Generic; -using System.IO; -using System.Text; -using Tensorflow.Keras.Engine; +using System.IO; using Tensorflow.Train; using ThirdParty.Tensorflow.Python.Keras.Protobuf; -using static Tensorflow.Binding; -using static Tensorflow.KerasApi; -namespace Tensorflow.Keras.Saving.SavedModel +namespace Tensorflow.Keras.Saving.SavedModel; + +public class KerasLoadModelUtils { - public class KerasLoadModelUtils + /// + /// Corresponding to keras/saving/save.py/load_model + /// + /// + /// + /// + /// + /// + public static Trackable load_model(string filepath, IDictionary? custom_objects = null, + bool compile = true, LoadOptions? options = null) { - /// - /// Corresponding to keras/saving/save.py/load_model - /// - /// - /// - /// - /// - /// - public static Trackable load_model(string filepath, IDictionary? custom_objects = null, - bool compile = true, LoadOptions? options = null) + using var savingScope = SharedObjectSavingScope.Enter(); + + using var ctx = LoadContext.load_context(options); + + if (!File.Exists(filepath) && !Directory.Exists(filepath)) { - using (SharedObjectSavingScope.Enter()) - { - using (LoadContext.load_context(options)) - { - if (!File.Exists(filepath) && !Directory.Exists(filepath)) - { - throw new IOException($"No file or directory found at {filepath}."); - } - if (Directory.Exists(filepath)) - { - return load(filepath, compile, options); - } - else - { - throw new NotImplementedException("Model load of h5 format has not been supported. Please submit an issue to https://github.com/SciSharp/TensorFlow.NET/issues if it's needed."); - } - } - } + throw new IOException($"No file or directory found at {filepath}."); } - private static Trackable load(string path, bool compile = true, LoadOptions? options = null) + if (Directory.Exists(filepath)) + { + return load(filepath, compile, options); + } + else { - SavedMetadata metadata = new SavedMetadata(); - var meta_graph_def = Loader.parse_saved_model(path).MetaGraphs[0]; - var object_graph_def = meta_graph_def.ObjectGraphDef; - string path_to_metadata_pb = Path.Combine(path, Constants.SAVED_METADATA_PATH); - if (File.Exists(path_to_metadata_pb)) - { - metadata.MergeFrom(new FileStream(path_to_metadata_pb, FileMode.Open, FileAccess.Read)); - } - else - { - throw new NotImplementedException("SavedModel saved prior to TF 2.5 detected when loading Keras model, please" + - " use higher version or submit an issue to https://github.com/SciSharp/TensorFlow.NET/issues. to let us know you need it."); - } + throw new NotImplementedException("Model load of h5 format has not been supported. Please submit an issue to https://github.com/SciSharp/TensorFlow.NET/issues if it's needed."); + } + } - if (metadata.Nodes is null || metadata.Nodes.Count == 0) - { - return Loader.load(path, options: options) as Model; - } + private static Trackable load(string path, bool compile = true, LoadOptions? options = null) + { + SavedMetadata metadata; + var meta_graph_def = Loader.parse_saved_model(path).MetaGraphs[0]; + var object_graph_def = meta_graph_def.ObjectGraphDef; + string path_to_metadata_pb = Path.Combine(path, Constants.SAVED_METADATA_PATH); + if (File.Exists(path_to_metadata_pb)) + { + using var stream = new FileStream(path_to_metadata_pb, FileMode.Open, FileAccess.Read); + metadata = SavedMetadata.Parser.ParseFrom(stream); + } + else + { + throw new NotImplementedException("SavedModel saved prior to TF 2.5 detected when loading Keras model, please" + + " use higher version or submit an issue to https://github.com/SciSharp/TensorFlow.NET/issues. to let us know you need it."); + } - var keras_loader = new KerasObjectLoader(metadata, object_graph_def); - keras_loader.load_layers(compile: compile); + if (metadata.Nodes is null || metadata.Nodes.Count == 0) + { + return Loader.load(path, options: options) as Model; + } - Dictionary)> nodes_to_load = new(); - nodes_to_load["root"] = (null, null); - foreach(var item in keras_loader.LoadedNodes) - { - nodes_to_load[keras_loader.get_path(item.Key)] = item.Value; - } - var loaded = Loader.load_partial(path, nodes_to_load, options); + var keras_loader = new KerasObjectLoader(metadata, object_graph_def); + keras_loader.load_layers(compile: compile); - keras_loader.finalize_objects(); - keras_loader.del_tracking(); + Dictionary)> nodes_to_load = new(); + nodes_to_load["root"] = (null, null); + foreach(var item in keras_loader.LoadedNodes) + { + nodes_to_load[keras_loader.get_path(item.Key)] = item.Value; + } + var loaded = Loader.load_partial(path, nodes_to_load, options); - var model = loaded["root"]; + keras_loader.finalize_objects(); + keras_loader.del_tracking(); - if(model is Model && compile) - { - // TODO(Rinne): implement it. - } + var model = loaded["root"]; - if (!tf.Context.executing_eagerly()) - { - // TODO(Rinne): implement it. - } + if (model is Model && compile) + { + // TODO(Rinne): implement it. + } - return model; + if (!tf.Context.executing_eagerly()) + { + // TODO(Rinne): implement it. } + + return model; } } diff --git a/src/TensorFlowNET.Keras/Saving/SavedModel/serialized_attributes.cs b/src/TensorFlowNET.Keras/Saving/SavedModel/serialized_attributes.cs index 0ec5d1a8c..325d3327a 100644 --- a/src/TensorFlowNET.Keras/Saving/SavedModel/serialized_attributes.cs +++ b/src/TensorFlowNET.Keras/Saving/SavedModel/serialized_attributes.cs @@ -3,7 +3,7 @@ using System.Linq; using System.Text; using Tensorflow.Keras.Engine; -using Tensorflow.Keras.Layers.Rnn; +using Tensorflow.Keras.Layers; using Tensorflow.Keras.Metrics; using Tensorflow.Train; diff --git a/src/TensorFlowNET.Keras/Saving/hdf5_format.cs b/src/TensorFlowNET.Keras/Saving/hdf5_format.cs index b04391be9..bab0efecf 100644 --- a/src/TensorFlowNET.Keras/Saving/hdf5_format.cs +++ b/src/TensorFlowNET.Keras/Saving/hdf5_format.cs @@ -7,6 +7,8 @@ using static Tensorflow.Binding; using static Tensorflow.KerasApi; using System.Linq; +using System.Text.RegularExpressions; + namespace Tensorflow.Keras.Saving { public class hdf5_format @@ -131,7 +133,7 @@ public static void load_weights_from_hdf5_group(long f, List layers) long g = H5G.open(f, name); var weight_names = load_attributes_from_hdf5_group(g, "weight_names"); foreach (var i_ in weight_names) - { + { (success, Array result) = Hdf5.ReadDataset(g, i_); if (success) weight_values.Add(np.array(result)); @@ -192,8 +194,13 @@ public static void save_weights_to_hdf5_group(long f, List layers) var tensor = val.AsTensor(); if (name.IndexOf("/") > 1) { - var crDataGroup = Hdf5.CreateOrOpenGroup(g, Hdf5Utils.NormalizedName(name.Split('/')[0])); - WriteDataset(crDataGroup, name.Split('/')[1], tensor); + var crDataGroup = g; + string[] name_split = name.Split('/'); + for(int i = 0; i < name_split.Length - 1; i++) + { + crDataGroup = Hdf5.CreateOrOpenGroup(crDataGroup, Hdf5Utils.NormalizedName(name_split[i])); + } + WriteDataset(crDataGroup, name_split[name_split.Length - 1], tensor); Hdf5.CloseGroup(crDataGroup); } else diff --git a/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj b/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj index 5dc46fe49..36d1bc1d4 100644 --- a/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj +++ b/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj @@ -7,26 +7,30 @@ enable Tensorflow.Keras AnyCPU;x64 - 0.11.0 + 0.11.3 Haiping Chen Keras for .NET - Apache 2.0, Haiping Chen 2023 + Apache 2.0, Haiping Chen since 2018 TensorFlow.Keras https://github.com/SciSharp/TensorFlow.NET https://avatars3.githubusercontent.com/u/44989469?s=200&v=4 https://github.com/SciSharp/TensorFlow.NET - Keras for .NET is a C# version of Keras ported from the python version. - -* Support CIFAR-10 dataset in keras.datasets. -* Support Conv2D functional API. -* Support BatchNormalization layer. -* Building keras model in subclass, functional and sequential api -* Implemented backward_function. -* Support model.load_weights. -* Add Subtract layer -* Text preprocessing -* Preprocessing.timeseries_dataset_from_array -* Fixed memory leak for YOLOv3 model. + + Keras for .NET is a C# version of Keras ported from the python version. + + * Support CIFAR-10 dataset in keras.datasets. + * Support Conv2D functional API. + * Support BatchNormalization layer. + * Building keras model in subclass, functional and sequential api + * Implemented backward_function. + * Support model.load_weights. + * Add Subtract layer + * Text preprocessing + * Preprocessing.timeseries_dataset_from_array + * Fixed memory leak for YOLOv3 model. + * Support RNN and LSTM models + * Support Transformer model + Keras for .NET Keras is an API designed for human beings, not machines. Keras follows best practices for reducing cognitive load: it offers consistent & simple APIs, it minimizes the number of user actions required for common use cases, and it provides clear & actionable error messages. @@ -36,10 +40,10 @@ Keras is an API designed for human beings, not machines. Keras follows best prac true packages Git - true + False Open.snk - 0.11.0.0 - 0.11.0.0 + 0.11.3.0 + 0.11.3.0 LICENSE Debug;Release;GPU @@ -70,9 +74,77 @@ Keras is an API designed for human beings, not machines. Keras follows best prac + + True + 1 + $(NoWarn),1573,1591,1712,8602,8603,8625,CS0612 + + + + True + 1 + $(NoWarn),1573,1591,1712,8602,8603,8625,CS0612 + + + + True + 1 + $(NoWarn),1573,1591,1712,8602,8603,8625,CS0612 + + + + True + 1 + $(NoWarn),1573,1591,1712,8602,8603,8625,CS0612 + + + + False + 1 + $(NoWarn),1573,1591,1712,8602,8603,8625,CS0612 + + + + False + 1 + $(NoWarn),1573,1591,1712,8602,8603,8625,CS0612 + + + + False + 1 + $(NoWarn),1573,1591,1712,8602,8603,8625,CS0612 + + + + False + 1 + $(NoWarn),1573,1591,1712,8602,8603,8625,CS0612 + + + + 1 + $(NoWarn),1573,1591,1712,8602,8603,8625,CS0612 + + + + 1 + $(NoWarn),1573,1591,1712,8602,8603,8625,CS0612 + + + + 1 + $(NoWarn),1573,1591,1712,8602,8603,8625,CS0612 + + + + 1 + $(NoWarn),1573,1591,1712,8602,8603,8625,CS0612 + + - - + + diff --git a/src/TensorFlowNET.Keras/Utils/RnnUtils.cs b/src/TensorFlowNET.Keras/Utils/RnnUtils.cs index e8700c1f2..1e9f6d845 100644 --- a/src/TensorFlowNET.Keras/Utils/RnnUtils.cs +++ b/src/TensorFlowNET.Keras/Utils/RnnUtils.cs @@ -3,7 +3,7 @@ using System.Diagnostics; using System.Text; using Tensorflow.Common.Types; -using Tensorflow.Keras.Layers.Rnn; +using Tensorflow.Keras.Layers; using Tensorflow.Common.Extensions; namespace Tensorflow.Keras.Utils diff --git a/src/TensorFlowNET.Keras/Utils/data_utils.cs b/src/TensorFlowNET.Keras/Utils/data_utils.cs index 5b84c601f..b0bc15540 100644 --- a/src/TensorFlowNET.Keras/Utils/data_utils.cs +++ b/src/TensorFlowNET.Keras/Utils/data_utils.cs @@ -39,5 +39,54 @@ public static string get_file(string fname, string origin, return datadir; } + + public static (int[,], long[]) _remove_long_seq(int maxlen, int[,] seq, long[] label) + { + /*Removes sequences that exceed the maximum length. + + Args: + maxlen: Int, maximum length of the output sequences. + seq: List of lists, where each sublist is a sequence. + label: List where each element is an integer. + + Returns: + new_seq, new_label: shortened lists for `seq` and `label`. + + */ + var nRow = seq.GetLength(0); + var nCol = seq.GetLength(1); + List new_seq = new List(); + List new_label = new List(); + + for (var i = 0; i < nRow; i++) + { + if (maxlen < nCol && seq[i, maxlen] != 0) + continue; + int[] sentence = new int[maxlen]; + for (var j = 0; j < maxlen && j < nCol; j++) + { + sentence[j] = seq[i, j]; + } + new_seq.Add(sentence); + new_label.Add(label[i]); + } + + int[,] new_seq_array = new int[new_seq.Count, maxlen]; + long[] new_label_array = new long[new_label.Count]; + + for (var i = 0; i < new_seq.Count; i++) + { + for (var j = 0; j < maxlen; j++) + { + new_seq_array[i, j] = new_seq[i][j]; + } + } + + for (var i = 0; i < new_label.Count; i++) + { + new_label_array[i] = new_label[i]; + } + return (new_seq_array, new_label_array); + } } } diff --git a/src/TensorFlowNET.Keras/Utils/generic_utils.cs b/src/TensorFlowNET.Keras/Utils/generic_utils.cs index 6a59fb880..5402f4995 100644 --- a/src/TensorFlowNET.Keras/Utils/generic_utils.cs +++ b/src/TensorFlowNET.Keras/Utils/generic_utils.cs @@ -29,6 +29,7 @@ limitations under the License. using Tensorflow.Keras.Layers; using Tensorflow.Keras.Saving; using Tensorflow.Train; +using System.Text.RegularExpressions; namespace Tensorflow.Keras.Utils { @@ -126,12 +127,15 @@ public static FunctionalConfig deserialize_model_config(JToken json) public static string to_snake_case(string name) { - return string.Concat(name.Select((x, i) => + string intermediate = Regex.Replace(name, "(.)([A-Z][a-z0-9]+)", "$1_$2"); + string insecure = Regex.Replace(intermediate, "([a-z])([A-Z])", "$1_$2").ToLower(); + + if (insecure[0] != '_') { - return i > 0 && char.IsUpper(x) && !Char.IsDigit(name[i - 1]) ? - "_" + x.ToString() : - x.ToString(); - })).ToLower(); + return insecure; + } + + return "private" + insecure; } /// diff --git a/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs b/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs index c637cf858..e93324f3e 100644 --- a/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs +++ b/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs @@ -1,5 +1,6 @@ using Microsoft.VisualStudio.TestTools.UnitTesting; using System; +using System.Linq; using Tensorflow; using static Tensorflow.Binding; @@ -23,7 +24,7 @@ public void SimpleWhileLoop() private void _testWhileContextHelper(int maximum_iterations) { // TODO: implement missing code dependencies - var sess = this.cached_session(); + using var sess = this.cached_session(); var i = constant_op.constant(0, name: "i"); var c = new Func(x => gen_math_ops.less(x, ops.convert_to_tensor(10), name: "c")); var b = new Func(x => math_ops.add(x, 1, name: "c")); diff --git a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs index f240817b4..fc2280051 100644 --- a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs +++ b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs @@ -5,6 +5,7 @@ using System.Linq; using Tensorflow; using static Tensorflow.Binding; +using Tensorflow.Framework; namespace TensorFlowNET.UnitTest.Gradient { @@ -388,81 +389,77 @@ public void testBoundaryStop() } - [Ignore("TODO")] [TestMethod] public void testBoundaryContinue() { - //@test_util.run_v1_only("b/120545219") - //def testBoundaryContinue(self): - // # Test that we differentiate both 'x' and 'y' correctly when x is a - // # predecessor of y. - // with self.cached_session(): - // x = constant(1.0) - // y = x * 2.0 - // z = y * 3.0 - // grads = gradients.gradients(z, [x, y]) - // self.assertTrue(all(x is not None for x in grads)) - // self.assertEqual(6.0, grads[0].eval()) + // Test that we differentiate both 'x' and 'y' correctly when x is a + // predecessor of y. + //TODO: @test_util.run_v1_only("b/120545219") + + using (self.cached_session()) + { + var x = tf.constant(1.0); + var y = x * 2.0; + var z = y * 3.0; + var grads = tf.gradients(z, new[] { x, y }); + self.assertTrue(all(grads.Select(x => x != null))); + self.assertEqual(6.0, grads[0].eval()); + } } - [Ignore("TODO")] [TestMethod] public void testAggregationMethodAccumulateN() { + //TODO: @test_util.run_v1_only("b/120545219") - //@test_util.run_v1_only("b/120545219") - //def testAggregationMethodAccumulateN(self): - // with self.cached_session(): - // x = constant(1.0) - // y = x * 2.0 - // z = y + y + y + y + y + y + y + y + y + y - // grads = gradients.gradients( - // z, [x, y], - // aggregation_method=gradients.AggregationMethod. - // EXPERIMENTAL_ACCUMULATE_N) - // self.assertTrue(all(x is not None for x in grads)) - // self.assertEqual(20.0, grads[0].eval()) - // self.assertEqual(10.0, grads[1].eval()) - + using (self.cached_session()) + { + var x = tf.constant(1.0); + var y = x * 2.0; + var z = y + y + y + y + y + y + y + y + y + y; + var grads = tf.gradients(z, new[] { x, y }, + aggregation_method: AggregationMethod.EXPERIMENTAL_ACCUMULATE_N); + self.assertTrue(all(grads.Select(x => x != null))); + self.assertEqual(20.0, grads[0].eval()); + self.assertEqual(10.0, grads[1].eval()); + } } - [Ignore("TODO")] [TestMethod] public void testAggregationMethodAddN() { - //@test_util.run_v1_only("b/120545219") - //def testAggregationMethodAddN(self): - // with self.cached_session(): - // x = constant(1.0) - // y = x * 2.0 - // z = y + y + y + y + y + y + y + y + y + y - // grads = gradients.gradients( - // z, [x, y], aggregation_method=gradients.AggregationMethod.ADD_N) - // self.assertTrue(all(x is not None for x in grads)) - // self.assertEqual(20.0, grads[0].eval()) - // self.assertEqual(10.0, grads[1].eval()) - + //TODO: @test_util.run_v1_only("b/120545219") + using (self.cached_session()) + { + var x = tf.constant(1.0); + var y = x * 2.0; + var z = y + y + y + y + y + y + y + y + y + y; + var grads = tf.gradients(z, new[] { x, y }, + aggregation_method: AggregationMethod.ADD_N); + self.assertTrue(grads.All(x => x != null)); + self.assertEqual(20.0, grads[0].eval()); + self.assertEqual(10.0, grads[1].eval()); + } } - [Ignore("TODO")] [TestMethod] public void testAggregationMethodTree() { - //@test_util.run_v1_only("b/120545219") - //def testAggregationMethodTree(self): - // with self.cached_session(): - // x = constant(1.0) - // y = x * 2.0 - // z = y + y + y + y + y + y + y + y + y + y - // grads = gradients.gradients( - // z, [x, y], - // aggregation_method=gradients.AggregationMethod.EXPERIMENTAL_TREE) - // self.assertTrue(all(x is not None for x in grads)) - // self.assertEqual(20.0, grads[0].eval()) - // self.assertEqual(10.0, grads[1].eval()) + //TODO: @test_util.run_v1_only("b/120545219") + using (self.cached_session()) + { + var x = tf.constant(1.0); + var y = x * 2.0; + var z = y + y + y + y + y + y + y + y + y + y; + var grads = tf.gradients(z, new[] { x, y }, + aggregation_method: AggregationMethod.EXPERIMENTAL_TREE); + self.assertTrue(grads.All(x => x != null)); + self.assertEqual(20.0, grads[0].eval()); + self.assertEqual(10.0, grads[1].eval()); + } } [Ignore("TODO")] @@ -491,24 +488,37 @@ public void testNoGradientForStringOutputs() // self.assertTrue(isinstance(grads[0], ops.Tensor)) } - [Ignore("TODO")] + [Ignore("TODO: CompositeTensors are not supported yet.")] [TestMethod] public void testSingletonIndexedSlices() { + tf.Graph().as_default(); + + // TODO: uncomment when CompositeTensors are supported. + /* + var x = tf.placeholder(TF_DataType.TF_FLOAT); + var y = tf.identity(x); + var dy_indices = tf.placeholder(TF_DataType.TF_INT32); + var dy_values = tf.placeholder(TF_DataType.TF_FLOAT); + var dy = new IndexedSlices(dy_values, dy_indices); + + var dx = tf.gradients(new[] { y }, new[] { x }, grad_ys: new[] { dy })[0]; + // The IndexedSlices gradient of tf.identity is the identity map. + using (var sess = self.cached_session()) + { + var feed_dict = new FeedItem[] + { + ( x, new Tensor(new float[] { 1.0f }) ), + (dy_indices, new Tensor(new int[] { 0 })), + (dy_values, new Tensor(new float[] { 2.0f })) + }; + var result = sess.run(new[] { dx, dy }, feed_dict); + var vdx = result[0]; + var vdy = result[1]; + self.assertEqual(vdx, vdy); + } + */ - //def testSingletonIndexedSlices(self): - // with ops.Graph().as_default(): - // x = array_ops.placeholder(dtypes.float32) - // y = array_ops.identity(x) - // dy = ops.IndexedSlices( - // array_ops.placeholder(dtypes.float32), - // array_ops.placeholder(dtypes.int32)) - // dx, = gradients.gradients(y, x, grad_ys=dy) - // # The IndexedSlices gradient of tf.identity is the identity map. - // with self.cached_session() as sess: - // vdx, vdy = sess.run( - // [dx, dy], feed_dict={x: [1.0], dy.indices: [0], dy.values: [2.0]}) - // self.assertEqual(vdx, vdy) } [Ignore("TODO")] @@ -576,26 +586,25 @@ public void testVariableRefGradient() // self.assertIsNotNone(gradient) } - [Ignore("TODO")] [TestMethod] public void testDependentYs() { - //@test_util.run_v1_only("b/120545219") - //def testDependentYs(self): - // with self.cached_session(): - // x = constant_op.constant(3.0) - // y = math_ops.square(x) - // y1 = math_ops.square(y) - // y2 = math_ops.square(y1) - // g = gradients.gradients([y, y2], x) - // self.assertAllClose(17502.0, g[0].eval()) - // g = gradients.gradients(y + y2, x) - // self.assertAllClose(17502.0, g[0].eval()) - // z = array_ops.identity(y) - // z2 = array_ops.identity(y2) - // g = gradients.gradients([z, z2], x) - // self.assertAllClose(17502.0, g[0].eval()) - + //TODO: @test_util.run_v1_only("b/120545219") + using (self.cached_session()) + { + var x = constant_op.constant(3.0); + var y = math_ops.square(x); + var y1 = math_ops.square(y); + var y2 = math_ops.square(y1); + var g = tf.gradients(new[] { y, y2 }, new[] { x }); + self.assertAllClose(17502.0, g[0].eval()); + g = tf.gradients(y + y2, x); + self.assertAllClose(17502.0, g[0].eval()); + var z = array_ops.identity(y); + var z2 = array_ops.identity(y2); + g = tf.gradients(new[] { z, z2 }, new[] { x }); + self.assertAllClose(17502.0, g[0].eval()); + } } [Ignore("TODO")] @@ -603,75 +612,152 @@ public void testDependentYs() public void testPartialDerivatives() { - //@test_util.run_v1_only("b/120545219") - //def testPartialDerivatives(self): - // with self.cached_session(): - // x = constant_op.constant(1.) - // y = 2 * x - // z = x + y - // totalg = gradients.gradients(z, [x, y]) - // self.assertEqual([3.0, 1.0], [g.eval() for g in totalg]) - // partialg = gradients.gradients(z, [x, y], stop_gradients=[x, y]) - // self.assertEqual([1.0, 1.0], [g.eval() for g in partialg]) + //TODO: @test_util.run_v1_only("b/120545219") + using (self.cached_session()) + { + var x = tf.constant(1.0); + var y = 2 * x; + var z = x + y; + var totalg = tf.gradients(z, new[] { x, y }); + self.assertEqual(new[] { 3.0, 1.0 }, totalg.Select(g => g.eval())); + var partialg = tf.gradients(z, new[] { x, y }, stop_gradients: new[] { x, y }); + self.assertEqual(new[] { 1.0, 1.0 }, partialg.Select(g => g.eval())); + } } - [Ignore("TODO")] + // TODO: remove when np.testing.assert_allclose(a, b) is implemented + private class CollectionComparer : System.Collections.IComparer + { + private readonly double _epsilon = 1e-07; + + public int Compare(object x, object y) + { + var a = (double)x; + var b = (double)y; + + double delta = Math.Abs(a - b); + if (delta < _epsilon) + { + return 0; + } + return a.CompareTo(b); + } + } + + private struct Case + { + public Tensor[] grad1; + public Tensor[] grad2; + public string constants; + public string variables; + } + + [Ignore("FIXME")] [TestMethod] public void testStopGradients() { + + //TODO: @test_util.run_v1_only("b/120545219") + Dictionary makeGraph(RandomizedImpl rng, string stop_gradients) + { + Tensor functionOf(Tensor[] xs, int k) + { + var shape = new Shape(k, k); + // TODO: replace by DefaultIfEmpty() before Aggregate(). + if (!xs.Any()) + { + return rng.random(shape).astype(np.float32); + } + return xs.Select(x => gen_math_ops.mat_mul(rng.random(shape).astype(np.float32), x)) + .Aggregate((t1, t2) => t1 + t2) + + rng.random(shape).astype(np.float32); + } + var a = functionOf(Array.Empty(), 3); + if (stop_gradients.Contains('a')) a = array_ops.stop_gradient(a); + var b = functionOf(new Tensor[] { a }, 3); + if (stop_gradients.Contains('b')) b = array_ops.stop_gradient(b); + var c = functionOf(new Tensor[] { a, b }, 3); + if (stop_gradients.Contains('c')) c = array_ops.stop_gradient(c); + var d = functionOf(new Tensor[] { b, c }, 3); + if (stop_gradients.Contains('d')) d = array_ops.stop_gradient(d); - //@test_util.run_v1_only("b/120545219") - //def testStopGradients(self): - // def _MakeGraph(rng, stop_gradients=()): - // def _FunctionOf(xs, k=3): - // return ops.convert_to_tensor( - // sum(math_ops.matmul(rng.rand(k, k), x) for x in xs) - // + rng.rand(k, k)) - - // a = _FunctionOf([]) - // if "a" in stop_gradients: a = array_ops.stop_gradient(a) - // b = _FunctionOf([a]) - // if "b" in stop_gradients: b = array_ops.stop_gradient(b) - // c = _FunctionOf([a, b]) - // if "c" in stop_gradients: c = array_ops.stop_gradient(c) - // d = _FunctionOf([b, c]) - // if "d" in stop_gradients: d = array_ops.stop_gradient(d) - // return dict(a=a, b=b, c=c, d=d) - - // def _Gradients(ys, xs, **kwargs): - // dydxs = gradients.gradients(ys, xs, **kwargs) - // dydxs = [0. * x if dydx is None else dydx - // for x, dydx in zip(xs, dydxs)] - // return dydxs - // seed = np.random.randint(1000) - // cases = [] - // subsets = [""] + "a b c d ab ac ad bc bd cd abc abd acd bcd abcd".split() - // graph = _MakeGraph(np.random.RandomState(seed)) - // for constants in subsets: - // graph_with_stops = _MakeGraph(np.random.RandomState(seed), constants) - // for variables_ in subsets: - // # compute the gradient when stopped using tf.stop_gradients - // grad1 = _Gradients([graph_with_stops["d"]], - // [graph_with_stops[v] for v in variables_]) - // # compute the gradient when stopped using the stop_gradients kwarg - // grad2 = _Gradients([graph["d"]], - // [graph[v] for v in variables_], - // stop_gradients=[graph[v] for v in constants]) - // cases.append(dict(grad1=grad1, grad2=grad2, - // constants=constants, variables=variables_)) - - // # evaluate all tensors in one call to session.run for speed - // with self.cached_session() as sess: - // results = sess.run([(case["grad1"], case["grad2"]) for case in cases]) - - // for (npgrad1, npgrad2), case in zip(results, cases): - // for a, b in zip(npgrad1, npgrad2): - // np.testing.assert_allclose(a, b) + return new Dictionary + { + { 'a', a }, + { 'b', b }, + { 'c', c }, + { 'd', d } + }; + } + + Tensor[] gradients(Tensor[] ys, Tensor[] xs, Tensor[] stop_gradients = null) + { + var dydxs = tf.gradients(ys, xs, stop_gradients); + dydxs = dydxs.Select((dydx, i) => dydx == null ? xs[i] * 0 : dydx).ToArray(); + return dydxs; + } + + var seed = np.random.randint(1000); + // TODO: remove next line when np.random.RandomState implemented. + tf.set_random_seed(seed); + var cases = new List(); + // TODO: add "" case. + var subsets = new List { "" }.Concat("a b c d ab ac ad bc bd cd abc abd acd bcd abcd".Split()); + // TODO: pass np.random.RandomState(seed) instead of np.random + var graph = makeGraph(np.random, string.Empty); + foreach (var constants in subsets) + { + var graphWithStops = makeGraph(np.random, constants); + foreach (var variables_ in subsets) + { + // compute the gradient when stopped using tf.stop_gradients + var grad1 = gradients( + new[] { graphWithStops['d'] }, + variables_.ToCharArray().Select(v => graphWithStops[v]).ToArray() + ); + // compute the gradient when stopped using the stop_gradients from args + var grad2 = gradients( + new[] { graph['d'] }, + variables_.ToCharArray().Select(v => graph[v]).ToArray(), + constants.ToCharArray().Select(c => graph[c]).DefaultIfEmpty(null)?.ToArray() + ); + cases.Add(new Case + { + grad1 = grad1, + grad2 = grad2, + variables = variables_, + constants = constants, + }) ; + } + } + // evaluate all tensors in one call to session.run for speed + using (var sess = self.cached_session()) + { + var results = sess.run( + cases.Select(case_ => ( + case_.grad1, + case_.grad2 + )).ToArray() + ); + + foreach (var (result, case_) in results.Zip(cases)) + { + var npgrad1 = result[0]; + var npgrad2 = result[1]; + foreach (var (a, b) in npgrad1.Zip(npgrad2)) + { + // TODO: np.testing.assert_allclose(a, b); + CollectionAssert.AreEqual(a.ToArray(), b.ToArray(), new CollectionComparer()); + } + } + } } - [Ignore("TODO")] + + + [Ignore("TODO: Unconnected gradients are not implemented")] [TestMethod] public void testUnconnectedGradientsNoneUnconnectedGradients() { @@ -686,7 +772,7 @@ public void testUnconnectedGradientsNoneUnconnectedGradients() // self.assertIsNone(grad[0]) } - [Ignore("TODO")] + [Ignore("TODO: Unconnected gradients are not implemented")] [TestMethod] public void testUnconnectedGradientsZerosUnconnectedGradients() { @@ -700,15 +786,21 @@ public void testUnconnectedGradientsZerosUnconnectedGradients() // [y], [x], unconnected_gradients="zero") // with self.cached_session() as sess: // self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(grads)[0]) + + // tf.Graph().as_default(); + // var x = tf.constant(1.0, shape: new long[] { 2, 2 }); + // var y = tf.constant(3.0, shape: new long[] { 3, 1 }); + // var grads = tf.gradients(new[] { y }, new[] { x }, unconnected_gradients: "zero"); + // using (self.cached_session()) + // { + // self.assertAllEqual(new[,] { { 0.0, 0.0 }, { 0.0, 0.0 } }, self.evaluate(grads)[0]); + // } } - [Ignore("TODO")] + [Ignore("TODO: Unconnected gradients are not implemented")] [TestMethod] public void testUnconnectedGradientsZeroConnectedGradients() { - - - //def testUnconnectedGradientsZeroConnectedGradients(self): // with ops.Graph().as_default(): // x = constant(1.0) @@ -717,9 +809,19 @@ public void testUnconnectedGradientsZeroConnectedGradients() // [y], [x], unconnected_gradients="zero") // with self.cached_session() as sess: // self.assertEquals(3.0, self.evaluate(grad)[0]) + + // tf.Graph().as_default(); + + // var x = tf.constant(1.0f); + // var y = x * 3.0f; + // var grad = tf.gradients(new [] { y }, new [] { x }, unconnected_gradients: "zero"); + // using (var sess = tf.Session()) + // { + // self.assertEquals(3.0, self.evaluate(grad)[0]); + // } } - [Ignore("TODO")] + [Ignore("TODO: Unconnected gradients are not implemented")] [TestMethod] public void testUnknownUnconnectedGradientsValueGiven() { @@ -730,15 +832,6 @@ public void testUnknownUnconnectedGradientsValueGiven() // with self.assertRaisesRegexp( // ValueError, "Unknown value for unconnected_gradients: 'nonsense'"): // gradients.gradients([y], [x], unconnected_gradients="nonsense") - } - - - - /* - - - - */ } } diff --git a/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs b/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs index c42445cf1..d671b6096 100644 --- a/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs +++ b/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs @@ -3,6 +3,7 @@ using System.Linq; using Tensorflow; using static Tensorflow.Binding; +using System; namespace TensorFlowNET.UnitTest { @@ -22,13 +23,86 @@ public void Initialize() contents = tf.io.read_file(imgPath); } + [TestMethod] + public void adjust_contrast() + { + var input = np.array(0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f); + var image = tf.reshape(input, new int[] { 3, 3, 1 }); + + var init = tf.global_variables_initializer(); + var sess = tf.Session(); + sess.run(init); + var adjust_contrast = tf.image.adjust_contrast(image, 2.0f); + var result = sess.run(adjust_contrast); + var res = np.array(-4f, -2f, 0f, 2f, 4f, 6f, 8f, 10f, 12f).reshape((3,3,1)); + Assert.AreEqual(result.numpy(), res); + } + + [Ignore] + [TestMethod] + public void adjust_hue() + { + var image = tf.constant(new int[] {1,2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18}); + image = tf.reshape(image, new int[] { 3, 2, 3 }); + var adjusted_image = tf.image.adjust_hue(image, 0.2f); + var res = tf.constant(new int[] {2,1,3, 4, 5, 6,8,7,9,11,10,12,14,13,15,17,16,18}); + res = tf.reshape(res,(3,2,3)); + Assert.AreEqual(adjusted_image, res); + } + + [TestMethod] + public void combined_non_max_suppression() + { + var boxesX = tf.constant(new float[,] { { 200, 100, 150, 100 }, { 220, 120, 150, 100 }, { 190, 110, 150, 100 }, { 210, 112, 150, 100 } }); + var boxes1 = tf.reshape(boxesX, (1, 4, 1, 4)); + var scoresX = tf.constant(new float[,] { { 0.2f, 0.7f, 0.1f }, { 0.1f, 0.8f, 0.1f }, { 0.3f, 0.6f, 0.1f }, { 0.05f, 0.9f, 0.05f } }); + var scores1 = tf.reshape(scoresX, (1, 4, 3)); + + var init = tf.global_variables_initializer(); + var sess = tf.Session(); + sess.run(init); + + var (boxes, scores, classes, valid_detections) = tf.image.combined_non_max_suppression(boxes1, scores1, 10, 10, 0.5f, 0.2f, clip_boxes: false); + var result = sess.run((boxes, scores, classes, valid_detections)); + + var boxes_gt = tf.constant(new float[,] { { 210f, 112f, 150f, 100f }, { 200f, 100f, 150f, 100f }, { 190f, 110f, 150f, 100f }, + { 0f, 0f, 0f, 0f},{ 0f, 0f, 0f, 0f},{ 0f, 0f, 0f, 0f},{ 0f, 0f, 0f , 0f},{ 0f, 0f, 0f, 0f},{ 0f , 0f, 0f, 0f},{ 0f, 0f, 0f, 0f} }); + boxes_gt = tf.reshape(boxes_gt, (1, 10, 4)); + Assert.AreEqual(result.Item1.numpy(), boxes_gt.numpy()); + var scores_gt = tf.constant(new float[,] { { 0.9f, 0.7f, 0.3f, 0f, 0f, 0f, 0f, 0f, 0f, 0f } }); + scores_gt = tf.reshape(scores_gt, (1, 10)); + Assert.AreEqual(result.Item2.numpy(), scores_gt.numpy()); + var classes_gt = tf.constant(new float[,] { { 1f, 1f, 0f, 0f, 0f, 0f, 0f, 0f, 0f, 0f } }); + classes_gt = tf.reshape(classes_gt, (1, 10)); + Assert.AreEqual(result.Item3.numpy(), classes_gt.numpy()); + var valid_detections_gt = tf.constant(new int[,] { { 3 } }); + valid_detections_gt = tf.reshape(valid_detections_gt, (1)); + Assert.AreEqual(result.Item4.numpy(), valid_detections_gt.numpy()); + } + + [TestMethod] + public void crop_and_resize() + { + int BATCH_SIZE = 1; + int NUM_BOXES = 5; + int IMAGE_HEIGHT = 256; + int IMAGE_WIDTH = 256; + int CHANNELS = 3; + var crop_size = tf.constant(new int[] { 24, 24 }); + var image = tf.random.uniform((BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS)); + var boxes = tf.random.uniform((NUM_BOXES, 4)); + var box_ind = tf.random.uniform((NUM_BOXES), minval: 0, maxval: BATCH_SIZE, dtype: TF_DataType.TF_INT32); + var output = tf.image.crop_and_resize(image, boxes, box_ind, crop_size); + Assert.AreEqual((5,24,24,3), output.shape); + } + [TestMethod] public void decode_image() { var img = tf.image.decode_image(contents); Assert.AreEqual(img.name, "decode_image/DecodeImage:0"); } - + [TestMethod] public void resize_image() { diff --git a/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs b/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs index 513791933..ccf59f5ae 100644 --- a/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs +++ b/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs @@ -6,6 +6,8 @@ using System.Linq; using Tensorflow; using static Tensorflow.Binding; +using OneOf.Types; +using System.Collections.Generic; namespace TensorFlowNET.UnitTest { @@ -139,6 +141,21 @@ public void assertProtoEquals(object toProto, object o) #region tensor evaluation and test session + private Session _cached_session = null; + private Graph _cached_graph = null; + private object _cached_config = null; + private bool _cached_force_gpu = false; + + private void _ClearCachedSession() + { + if (self._cached_session != null) + { + self._cached_session.Dispose(); + self._cached_session = null; + } + } + + //protected object _eval_helper(Tensor[] tensors) //{ // if (tensors == null) @@ -203,10 +220,56 @@ public T evaluate(Tensor tensor) } } - - public Session cached_session() + ///Returns a TensorFlow Session for use in executing tests. + public Session cached_session( + Graph graph = null, object config = null, bool use_gpu = false, bool force_gpu = false) { - throw new NotImplementedException(); + // This method behaves differently than self.session(): for performance reasons + // `cached_session` will by default reuse the same session within the same + // test.The session returned by this function will only be closed at the end + // of the test(in the TearDown function). + + // Use the `use_gpu` and `force_gpu` options to control where ops are run.If + // `force_gpu` is True, all ops are pinned to `/ device:GPU:0`. Otherwise, if + // `use_gpu` is True, TensorFlow tries to run as many ops on the GPU as + // possible.If both `force_gpu and `use_gpu` are False, all ops are pinned to + // the CPU. + + // Example: + // python + // class MyOperatorTest(test_util.TensorFlowTestCase) : + // def testMyOperator(self): + // with self.cached_session() as sess: + // valid_input = [1.0, 2.0, 3.0, 4.0, 5.0] + // result = MyOperator(valid_input).eval() + // self.assertEqual(result, [1.0, 2.0, 3.0, 5.0, 8.0] + // invalid_input = [-1.0, 2.0, 7.0] + // with self.assertRaisesOpError("negative input not supported"): + // MyOperator(invalid_input).eval() + + + // Args: + // graph: Optional graph to use during the returned session. + // config: An optional config_pb2.ConfigProto to use to configure the + // session. + // use_gpu: If True, attempt to run as many ops as possible on GPU. + // force_gpu: If True, pin all ops to `/device:GPU:0`. + + // Yields: + // A Session object that should be used as a context manager to surround + // the graph building and execution code in a test case. + + + // TODO: + // if context.executing_eagerly(): + // return self._eval_helper(tensors) + // else: + { + var sess = self._get_cached_session( + graph, config, force_gpu, crash_if_inconsistent_args: true); + using var cached = self._constrain_devices_and_set_default(sess, use_gpu, force_gpu); + return cached; + } } //Returns a TensorFlow Session for use in executing tests. @@ -254,6 +317,39 @@ public Session session(Graph graph = null, object config = null, bool use_gpu = return s.as_default(); } + private Session _constrain_devices_and_set_default(Session sess, bool use_gpu, bool force_gpu) + { + // Set the session and its graph to global default and constrain devices.""" + if (tf.executing_eagerly()) + return null; + else { + sess.graph.as_default(); + sess.as_default(); + { + if (force_gpu) + { + // TODO: + + // Use the name of an actual device if one is detected, or + // '/device:GPU:0' otherwise + /* var gpu_name = gpu_device_name(); + if (!gpu_name) + gpu_name = "/device:GPU:0" + using (sess.graph.device(gpu_name)) { + yield return sess; + }*/ + return sess; + } + else if (use_gpu) + return sess; + else + using (sess.graph.device("/device:CPU:0")) + return sess; + } + + } + } + // See session() for details. private Session _create_session(Graph graph, object cfg, bool forceGpu) { @@ -298,6 +394,50 @@ private Session _create_session(Graph graph, object cfg, bool forceGpu) return new Session(graph);//, config = prepare_config(config)) } + private Session _get_cached_session( + Graph graph = null, + object config = null, + bool force_gpu = false, + bool crash_if_inconsistent_args = true) + { + // See cached_session() for documentation. + if (self._cached_session == null) + { + var sess = self._create_session(graph, config, force_gpu); + self._cached_session = sess; + self._cached_graph = graph; + self._cached_config = config; + self._cached_force_gpu = force_gpu; + return sess; + } else { + + if (crash_if_inconsistent_args && !self._cached_graph.Equals(graph)) + throw new ValueError(@"The graph used to get the cached session is + different than the one that was used to create the + session. Maybe create a new session with + self.session()"); + if (crash_if_inconsistent_args && !self._cached_config.Equals(config)) { + throw new ValueError(@"The config used to get the cached session is + different than the one that was used to create the + session. Maybe create a new session with + self.session()"); + } + if (crash_if_inconsistent_args && !self._cached_force_gpu.Equals(force_gpu)) { + throw new ValueError(@"The force_gpu value used to get the cached session is + different than the one that was used to create the + session. Maybe create a new session with + self.session()"); + } + return _cached_session; + } + } + + [TestCleanup] + public void Cleanup() + { + _ClearCachedSession(); + } + #endregion public void AssetSequenceEqual(T[] a, T[] b) diff --git a/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/fingerprint.pb b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/fingerprint.pb new file mode 100644 index 000000000..c37cc37bd --- /dev/null +++ b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/fingerprint.pb @@ -0,0 +1 @@ +̟땐͉ Σ(ռ2 \ No newline at end of file diff --git a/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/keras_metadata.pb b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/keras_metadata.pb new file mode 100644 index 000000000..5fe8f1a65 --- /dev/null +++ b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/keras_metadata.pb @@ -0,0 +1,7 @@ + +&root"_tf_keras_sequential*&{"name": "sequential", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": false, "class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 5, 3]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}}, {"class_name": "LSTM", "config": {"name": "lstm", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 32, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 2}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 3}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2}}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "shared_object_id": 9, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 5, 3]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 5, 3]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 5, 3]}, "float32", "input_1"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 5, 3]}, "float32", "input_1"]}, "keras_version": "2.12.0", "backend": "tensorflow", "model_config": {"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 5, 3]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "shared_object_id": 0}, {"class_name": "LSTM", "config": {"name": "lstm", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 32, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 2}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 3}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2}, "shared_object_id": 5}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 6}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 7}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 8}]}}, "training_config": {"loss": "binary_crossentropy", "metrics": [[{"class_name": "MeanMetricWrapper", "config": {"name": "accuracy", "dtype": "float32", "fn": "binary_accuracy"}, "shared_object_id": 11}]], "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Custom>Adam", "config": {"name": "Adam", "weight_decay": null, "clipnorm": null, "global_clipnorm": null, "clipvalue": null, "use_ema": false, "ema_momentum": 0.99, "ema_overwrite_frequency": null, "jit_compile": false, "is_legacy_optimizer": false, "learning_rate": 0.0010000000474974513, "beta_1": 0.9, "beta_2": 0.999, "epsilon": 1e-07, "amsgrad": false}}}}2 + root.layer_with_weights-0"_tf_keras_rnn_layer* {"name": "lstm", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "LSTM", "config": {"name": "lstm", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 32, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 2}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 3}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2}, "shared_object_id": 5, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 3]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 12}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 5, 3]}}2 +root.layer_with_weights-1"_tf_keras_layer*{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 6}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 7}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 8, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 32}}, "shared_object_id": 13}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 32]}}2 +root.layer_with_weights-0.cell"_tf_keras_layer*{"name": "lstm_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "LSTMCell", "config": {"name": "lstm_cell", "trainable": true, "dtype": "float32", "units": 32, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 2}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 3}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2}, "shared_object_id": 4, "build_input_shape": {"class_name": "__tuple__", "items": [null, 3]}}2 +Rroot.keras_api.metrics.0"_tf_keras_metric*{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 14}2 +Sroot.keras_api.metrics.1"_tf_keras_metric*{"class_name": "MeanMetricWrapper", "name": "accuracy", "dtype": "float32", "config": {"name": "accuracy", "dtype": "float32", "fn": "binary_accuracy"}, "shared_object_id": 11}2 \ No newline at end of file diff --git a/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/saved_model.pb b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/saved_model.pb new file mode 100644 index 000000000..618c800eb Binary files /dev/null and b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/saved_model.pb differ diff --git a/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/variables/variables.data-00000-of-00001 b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/variables/variables.data-00000-of-00001 new file mode 100644 index 000000000..ea67db4f4 Binary files /dev/null and b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/variables/variables.data-00000-of-00001 differ diff --git a/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/variables/variables.index b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/variables/variables.index new file mode 100644 index 000000000..11f13d165 Binary files /dev/null and b/test/TensorFlowNET.Keras.UnitTest/Assets/lstm_from_sequential/variables/variables.index differ diff --git a/test/TensorFlowNET.Keras.UnitTest/InitLayerNameTest.cs b/test/TensorFlowNET.Keras.UnitTest/InitLayerNameTest.cs new file mode 100644 index 000000000..256eb69c1 --- /dev/null +++ b/test/TensorFlowNET.Keras.UnitTest/InitLayerNameTest.cs @@ -0,0 +1,33 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Tensorflow.Keras.Layers; +using static Tensorflow.Binding; +using static Tensorflow.KerasApi; + +namespace Tensorflow.Keras.UnitTest +{ + [TestClass] + public class InitLayerNameTest + { + [TestMethod] + public void RNNLayerNameTest() + { + var simpleRnnCell = keras.layers.SimpleRNNCell(1); + Assert.AreEqual("simple_rnn_cell", simpleRnnCell.Name); + var simpleRnn = keras.layers.SimpleRNN(2); + Assert.AreEqual("simple_rnn", simpleRnn.Name); + var lstmCell = keras.layers.LSTMCell(2); + Assert.AreEqual("lstm_cell", lstmCell.Name); + var lstm = keras.layers.LSTM(3); + Assert.AreEqual("lstm", lstm.Name); + } + + [TestMethod] + public void ConvLayerNameTest() + { + var conv2d = keras.layers.Conv2D(8, activation: "linear"); + Assert.AreEqual("conv2d", conv2d.Name); + var conv2dTranspose = keras.layers.Conv2DTranspose(8); + Assert.AreEqual("conv2d_transpose", conv2dTranspose.Name); + } + } +} diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs index 98d909668..7ebb53db3 100644 --- a/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs +++ b/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs @@ -110,6 +110,17 @@ public void Embedding() var output_array = model.predict(input_array); Assert.AreEqual((32, 10, 64), output_array.shape); } + [TestMethod] + public void EmbeddingGrad() + { + var inputs = keras.layers.Input(shape: new[] { 32, 10 }); + var outputs = keras.layers.Embedding(1000, 64, input_length: 10).Apply(inputs); + var model = keras.Model(inputs: inputs, outputs: outputs); + var input_array = np.random.randint(1000, size: (1, 32, 10)); + var output_array = np.random.random(size: (1, 32, 10, 64)); + model.compile("rmsprop", "mse", new[] { "accuracy" }); + model.fit(input_array, output_array); + } /// /// https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs index 8eeee7a88..dbf5cae1e 100644 --- a/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs +++ b/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs @@ -5,8 +5,9 @@ using System.Text; using System.Threading.Tasks; using Tensorflow.Common.Types; +using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; -using Tensorflow.Keras.Layers.Rnn; +using Tensorflow.Keras.Layers; using Tensorflow.Keras.Saving; using Tensorflow.NumPy; using Tensorflow.Train; @@ -38,8 +39,6 @@ public void StackedRNNCell() var cells = new IRnnCell[] { tf.keras.layers.SimpleRNNCell(4), tf.keras.layers.SimpleRNNCell(5) }; var stackedRNNCell = tf.keras.layers.StackedRNNCells(cells); var (output, state) = stackedRNNCell.Apply(inputs, states); - Console.WriteLine(output); - Console.WriteLine(state.shape); Assert.AreEqual((32, 5), output.shape); Assert.AreEqual((32, 4), state[0].shape); } @@ -108,6 +107,7 @@ public void RNNForSimpleRNNCell() var inputs = tf.random.normal((32, 10, 8)); var cell = tf.keras.layers.SimpleRNNCell(10, dropout: 0.5f, recurrent_dropout: 0.5f); var rnn = tf.keras.layers.RNN(cell: cell); + var cgf = rnn.get_config(); var output = rnn.Apply(inputs); Assert.AreEqual((32, 10), output.shape); @@ -132,5 +132,36 @@ public void RNNForLSTMCell() Console.WriteLine($"output: {output}"); Assert.AreEqual((5, 4), output.shape); } + + [TestMethod] + public void GRUCell() + { + var inputs = tf.random.normal((32, 10, 8)); + var rnn = tf.keras.layers.RNN(tf.keras.layers.GRUCell(4)); + var output = rnn.Apply(inputs); + Assert.AreEqual((32, 4), output.shape); + rnn = tf.keras.layers.RNN(tf.keras.layers.GRUCell(4, reset_after:false, use_bias:false)); + output = rnn.Apply(inputs); + Assert.AreEqual((32, 4), output.shape); + + } + + [TestMethod] + public void GRU() + { + var inputs = tf.ones((32, 10, 8)); + var gru = tf.keras.layers.GRU(4); + var output = gru.Apply(inputs); + Assert.AreEqual((32, 4), output.shape); + } + + [TestMethod] + public void Bidirectional() + { + var bi = tf.keras.layers.Bidirectional(keras.layers.LSTM(10, return_sequences:true)); + var inputs = tf.random.normal((32, 10, 8)); + var outputs = bi.Apply(inputs); + Assert.AreEqual((32, 10, 20), outputs.shape); + } } } diff --git a/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs b/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs index 10db2bd11..cb570fc0c 100644 --- a/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs +++ b/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs @@ -1,5 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +using Microsoft.VisualStudio.TestPlatform.Utilities; +using Microsoft.VisualStudio.TestTools.UnitTesting; using System.Linq; +using Tensorflow.Keras.Engine; using Tensorflow.Keras.Optimizers; using Tensorflow.Keras.UnitTest.Helpers; using Tensorflow.NumPy; @@ -79,6 +81,18 @@ public void ModelWithSelfDefinedModule() model.fit(dataset.Train.Data, dataset.Train.Labels, batch_size, num_epochs); } + [Ignore] + [TestMethod] + public void LSTMLoad() + { + var model = tf.keras.models.load_model(@"Assets/lstm_from_sequential"); + model.summary(); + model.compile(tf.keras.optimizers.Adam(), tf.keras.losses.MeanSquaredError(), new string[] { "accuracy" }); + var inputs = tf.random.normal(shape: (10, 5, 3)); + var outputs = tf.random.normal(shape: (10, 1)); + model.fit(inputs.numpy(), outputs.numpy(), batch_size: 10, epochs: 5, workers: 16, use_multiprocessing: true); + } + [Ignore] [TestMethod] public void VGG19() diff --git a/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj b/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj index 58c176e82..3910eba1c 100644 --- a/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj +++ b/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj @@ -65,6 +65,22 @@ PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + diff --git a/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs b/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs index 8317346ea..183544ab6 100644 --- a/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs +++ b/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs @@ -1,7 +1,10 @@ using Microsoft.VisualStudio.TestTools.UnitTesting; using System; +using System.Collections.Generic; using System.Linq; +using Tensorflow.NumPy; using static Tensorflow.Binding; +using static Tensorflow.KerasApi; namespace TensorFlowNET.UnitTest.Dataset { @@ -195,5 +198,40 @@ public void Shuffle() Assert.IsFalse(allEqual); } + [Ignore] + [TestMethod] + public void GetData() + { + var vocab_size = 20000; // Only consider the top 20k words + var maxlen = 200; // Only consider the first 200 words of each movie review + var dataset = keras.datasets.imdb.load_data(num_words: vocab_size, maxlen: maxlen); + var x_train = dataset.Train.Item1; + var y_train = dataset.Train.Item2; + var x_val = dataset.Test.Item1; + var y_val = dataset.Test.Item2; + + x_train = keras.preprocessing.sequence.pad_sequences(RemoveZeros(x_train), maxlen: maxlen); + x_val = keras.preprocessing.sequence.pad_sequences(RemoveZeros(x_val), maxlen: maxlen); + print(len(x_train) + " Training sequences"); + print(len(x_val) + " Validation sequences"); + } + IEnumerable RemoveZeros(NDArray data) + { + var data_array = (int[,])data.ToMultiDimArray(); + List new_data = new List(); + for (var i = 0; i < data_array.GetLength(0); i++) + { + List new_array = new List(); + for (var j = 0; j < data_array.GetLength(1); j++) + { + if (data_array[i, j] == 0) + break; + else + new_array.Add(data_array[i, j]); + } + new_data.Add(new_array.ToArray()); + } + return new_data; + } } } diff --git a/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs b/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs index 72f598e46..675689bb1 100644 --- a/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs +++ b/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs @@ -2,6 +2,7 @@ using Tensorflow.NumPy; using Tensorflow; using static Tensorflow.Binding; +using System.Linq; namespace TensorFlowNET.UnitTest.ManagedAPI { @@ -92,5 +93,17 @@ public void TensorArray() Assert.AreEqual(ta.read(1).numpy(), 20f); Assert.AreEqual(ta.read(2).numpy(), 30f); } + + /// + /// https://www.tensorflow.org/api_docs/python/tf/reverse + /// + [TestMethod] + public void ReverseArray() + { + var a = tf.random.normal((2, 3)); + var b = tf.reverse(a, -1); + Assert.IsTrue(Equal(a[0].ToArray().Reverse().ToArray(), b[0].ToArray())); + Assert.IsTrue(Equal(a[1].ToArray().Reverse().ToArray(), b[1].ToArray())); + } } } diff --git a/test/TensorFlowNET.UnitTest/ManagedAPI/MathApiTest.cs b/test/TensorFlowNET.UnitTest/ManagedAPI/MathApiTest.cs index 42ac641b1..411deb18f 100644 --- a/test/TensorFlowNET.UnitTest/ManagedAPI/MathApiTest.cs +++ b/test/TensorFlowNET.UnitTest/ManagedAPI/MathApiTest.cs @@ -1,6 +1,8 @@ using Microsoft.VisualStudio.TestTools.UnitTesting; +using System; using System.Linq; using Tensorflow; +using Tensorflow.NumPy; using static Tensorflow.Binding; namespace TensorFlowNET.UnitTest.ManagedAPI @@ -57,5 +59,26 @@ public void Erf() var actual = erf.ToArray(); Assert.IsTrue(Equal(expected, actual)); } + + [TestMethod] + public void ReduceEuclideanNorm() + { + var x = tf.constant(new[,] { { 1, 2, 3 }, { 1, 1, 1 } }); + Assert.AreEqual(tf.math.reduce_euclidean_norm(x).numpy(), 4); + + var y = tf.constant(new[,] { { 1, 2, 3 }, { 1, 1, 1 } }, dtype: tf.float32); + Assert.IsTrue(Equal(tf.math.reduce_euclidean_norm(y).numpy(), 4.1231055f)); + + Assert.IsTrue(Equal(tf.math.reduce_euclidean_norm(y, 0).ToArray(), + new float[] { np.sqrt(2f), np.sqrt(5f), np.sqrt(10f) })); + + Assert.IsTrue(Equal(tf.math.reduce_euclidean_norm(y, 1).ToArray(), + new float[] { np.sqrt(14f), np.sqrt(3f) })); + + Assert.IsTrue(Equal(tf.math.reduce_euclidean_norm(y, 1, keepdims: true).ToArray(), + new float[] { np.sqrt(14f), np.sqrt(3f) })); + + Assert.AreEqual(tf.math.reduce_euclidean_norm(y, (0, 1)).numpy(), np.sqrt(17f)); + } } } diff --git a/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj b/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj index 240960c91..7a6a7f92c 100644 --- a/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj +++ b/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj @@ -41,8 +41,8 @@ - - + +