DiffSharp
diff --git a/‎src/DiffSharp.Core/DiffSharp.fs
Lines changed: 2 additions & 0 deletions b/‎src/DiffSharp.Core/DiffSharp.fs
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/DiffSharp.Core/Model.fs
Lines changed: 40 additions & 8 deletions b/‎src/DiffSharp.Core/Model.fs
Lines changed: 40 additions & 8 deletions
diff --git a/‎src/DiffSharp.Core/Tensor.fs
Lines changed: 6 additions & 6 deletions b/‎src/DiffSharp.Core/Tensor.fs
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/DiffSharp.Tests/TestModel.fs
Lines changed: 5 additions & 0 deletions b/‎src/DiffSharp.Tests/TestModel.fs
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/Test/Program.fs
Lines changed: 43 additions & 26 deletions b/‎src/Test/Program.fs
Lines changed: 43 additions & 26 deletions
@@ -157,6 +157,8 @@ type DiffSharp =
     static member conv1d(b:Tensor, ?stride:int, ?padding:int, ?dilation:int) = fun (a:Tensor) -> a.conv1d(b, ?stride=stride, ?padding=padding, ?dilation=dilation)
     static member conv2d(a:Tensor, b:Tensor, ?stride:int, ?strides:seq<int>, ?padding:int, ?paddings:seq<int>, ?dilation:int, ?dilations:seq<int>) = a.conv2d(b, ?stride=stride, ?strides=strides, ?padding=padding, ?paddings=paddings, ?dilation=dilation, ?dilations=dilations)
     static member conv2d(b:Tensor, ?stride:int, ?strides:seq<int>, ?padding:int, ?paddings:seq<int>, ?dilation:int, ?dilations:seq<int>) = fun (a:Tensor) -> a.conv2d(b, ?stride=stride, ?strides=strides, ?padding=padding, ?paddings=paddings, ?dilation=dilation, ?dilations=dilations)
+    static member conv3d(a:Tensor, b:Tensor, ?stride:int, ?strides:seq<int>, ?padding:int, ?paddings:seq<int>, ?dilation:int, ?dilations:seq<int>) = a.conv3d(b, ?stride=stride, ?strides=strides, ?padding=padding, ?paddings=paddings, ?dilation=dilation, ?dilations=dilations)
+    static member conv3d(b:Tensor, ?stride:int, ?strides:seq<int>, ?padding:int, ?paddings:seq<int>, ?dilation:int, ?dilations:seq<int>) = fun (a:Tensor) -> a.conv3d(b, ?stride=stride, ?strides=strides, ?padding=padding, ?paddings=paddings, ?dilation=dilation, ?dilations=dilations)
 
 // Methods mirroring F# array modules
 // TODO: update to support non-float types once we have backing DTypes implemented
 
@@ -47,20 +47,28 @@ type ParameterDict() =
         let dd = d.copy()
         dd.unflatten(tensors)
         dd
+    override d.ToString() =
+        let sb = System.Text.StringBuilder()
+        for KeyValue(n, p) in d.values do sb.AppendLine(sprintf "%A, %A" n p) |> ignore
+        sb.ToString()
+        
 
 [<AbstractClass>]
 type Model() =
     member val Parameters:ParameterDict = ParameterDict()
     member val SubModels:Dictionary<string, Model> = Dictionary()
-    member inline m.add(parameters:list<string * 'a>) =
-        for n, p in parameters do
+    member m.add(parameters:seq<obj>, ?names:seq<string>) =
+        let parameters = parameters |> Seq.toArray
+        let names = defaultArg names (Seq.init (parameters.Length) (fun i -> sprintf "p__%d" i)) |> Seq.toArray
+        if parameters.Length <> names.Length then failwithf "Expecting parameters.Length (%A) and names.Length (%A) to be same" parameters.Length names.Length
+        for p, n in Array.zip parameters names do
             match (box p) with
             | :? Parameter as p -> 
                 m.Parameters.add(n, p)
             | :? Model as mm ->
                 m.SubModels.Add(n, mm)
                 m.Parameters.add(mm.Parameters.map(fun (nn, pp:Parameter) -> (n + "__" + nn, pp)))
-            | _ -> failwithf "Unsupported type. Expecting a list<string * 'a> where 'a is Parameter or Model"
+            | _ -> failwithf "Unsupported type. Expecting a Parameter or Model"
     member m.forwardDiff(derivatives:ParameterDict) = m.Parameters.forwarddiff(derivatives)
     member m.reverseDiff() = m.Parameters.reverseDiff()
     member m.noDiff() = m.Parameters.noDiff()
@@ -76,6 +84,12 @@ type Model() =
     member m.forwardLoss (f:Tensor->Tensor->Tensor) (input:Tensor) (target:Tensor) (parameters:Tensor) =
         m.forwardCompose (f target) input parameters
     abstract member forward: Tensor -> Tensor
+    static member create ps f =
+        let model = { new Model() with override __.forward(x) = f x}
+        model.add(ps)
+        model
+    static member compose (model1:Model) (model2:Model) =
+        Model.create [model1; model2] (model1.forward >> model2.forward)
 
 
 type Weight() =
@@ -95,7 +109,7 @@ type Linear(inFeatures, outFeatures, ?bias:bool) =
     let w = Parameter(Weight.kaiming(inFeatures, outFeatures))
     let k = 1./sqrt (float outFeatures)
     let b = Parameter(if bias then Weight.standard([|outFeatures|], k) else dsharp.zero())
-    do base.add(["weight", w; "bias", b])
+    do base.add([w;b],["Linear__weight";"Linear__bias"])
     override l.forward(value) =
         let f = dsharp.matmul(value, w.value)
         if bias then f + b.value else f
@@ -107,7 +121,7 @@ type Conv1d(inChannels:int, outChannels:int, kernelSize:int, ?stride:int, ?paddi
     let k = 1./ sqrt (float (inChannels*kernelSize))
     let w = Parameter <| Weight.standard([|outChannels; inChannels; kernelSize|], k)
     let b = Parameter <| if bias then Weight.standard([|outChannels|], k) else dsharp.zero()
-    do base.add(["weight", w; "bias", b])
+    do base.add([w;b],["Conv1d__weight";"Conv1d__bias"])
     override c.forward(value) =
         let f = dsharp.conv1d(value, w.value, ?stride=stride, ?padding=padding, ?dilation=dilation)
         if bias then f + b.value.expand([value.shape.[0]; outChannels]).view([value.shape.[0]; outChannels; 1]) else f
@@ -119,13 +133,31 @@ type Conv2d(inChannels:int, outChannels:int, ?kernelSize:int, ?stride:int, ?padd
         match kernelSize, kernelSizes with
         | Some _ , Some _ -> failwithf "Expecting only one of kernelSize, kernelSizes"
         | Some k, None -> [|k; k|]
-        | None, Some k -> k |> Array.ofSeq
+        | None, Some k -> let k = k |> Array.ofSeq in if k.Length <> 2 then failwithf "Expecting kernelSizes to have length two" else k
         | _ -> [|1; 1|]
     let bias = defaultArg bias true
     let k = 1./ sqrt (float (inChannels*kernelSizes.[0]*kernelSizes.[1]))
     let w = Parameter <| Weight.standard([|outChannels; inChannels; kernelSizes.[0]; kernelSizes.[1]|], k)
     let b = Parameter <| if bias then Weight.standard([|outChannels|], k) else dsharp.zero()
-    do base.add(["weight", w; "bias", b])
+    do base.add([w;b],["Conv2d__weight";"Conv2d__bias"])
     override c.forward(value) =
         let f = dsharp.conv2d(value, w.value, ?stride=stride, ?strides=strides, ?padding=padding, ?paddings=paddings, ?dilation=dilation, ?dilations=dilations)
-        if bias then f + b.value.expand([value.shape.[0]; outChannels]).view([value.shape.[0]; outChannels; 1; 1]) else f
+        if bias then f + b.value.expand([value.shape.[0]; outChannels]).view([value.shape.[0]; outChannels; 1; 1]) else f
+
+
+type Conv3d(inChannels:int, outChannels:int, ?kernelSize:int, ?stride:int, ?padding:int, ?dilation:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>, ?dilations:seq<int>, ?bias:bool) =
+    inherit Model()
+    let kernelSizes = 
+        match kernelSize, kernelSizes with
+        | Some _ , Some _ -> failwithf "Expecting only one of kernelSize, kernelSizes"
+        | Some k, None -> [|k; k; k|]
+        | None, Some k -> let k = k |> Array.ofSeq in if k.Length <> 3 then failwithf "Expecting kernelSizes to have length three" else k
+        | _ -> [|1; 1; 1|]
+    let bias = defaultArg bias true
+    let k = 1./ sqrt (float (inChannels*kernelSizes.[0]*kernelSizes.[1]*kernelSizes.[2]))
+    let w = Parameter <| Weight.standard([|outChannels; inChannels; kernelSizes.[0]; kernelSizes.[1]; kernelSizes.[2]|], k)
+    let b = Parameter <| if bias then Weight.standard([|outChannels|], k) else dsharp.zero()
+    do base.add([w;b],["Conv3d__weight";"Conv3d__bias"])
+    override c.forward(value) =
+        let f = dsharp.conv3d(value, w.value, ?stride=stride, ?strides=strides, ?padding=padding, ?paddings=paddings, ?dilation=dilation, ?dilations=dilations)
+        if bias then f + b.value.expand([value.shape.[0]; outChannels]).view([value.shape.[0]; outChannels; 1; 1; 1]) else f
@@ -1130,19 +1130,19 @@ type Tensor =
             match stride, strides with
             | Some _ , Some _ -> failwithf "Expecting only one of stride, strides"
             | Some s, None -> [|s; s|]
-            | None, Some s -> s |> Array.ofSeq
+            | None, Some s -> let s = s |> Array.ofSeq in if s.Length <> 2 then failwithf "Expecting strides to have length two" else s
             | _ -> [|1; 1|]
         let paddings = 
             match padding, paddings with
             | Some _ , Some _ -> failwithf "Expecting only one of padding, paddings"
             | Some p, None -> [|p; p|]
-            | None, Some p -> p |> Array.ofSeq
+            | None, Some p -> let p = p |> Array.ofSeq in if p.Length <> 2 then failwithf "Expecting paddings to have length two" else p
             | _ -> [|0; 0|]
         let dilations = 
             match dilation, dilations with
             | Some _ , Some _ -> failwithf "Expecting only one of dilation, dilations"
             | Some d, None -> [|d; d|]
-            | None, Some d -> d |> Array.ofSeq
+            | None, Some d -> let d = d |> Array.ofSeq in if d.Length <> 2 then failwithf "Expecting dilations to have length two" else d
             | _ -> [|1; 1|]
         checkCanConv2d a.shape b.shape strides paddings dilations
         let mutable b = b
@@ -1217,19 +1217,19 @@ type Tensor =
             match stride, strides with
             | Some _ , Some _ -> failwithf "Expecting only one of stride, strides"
             | Some s, None -> [|s; s; s|]
-            | None, Some s -> s |> Array.ofSeq
+            | None, Some s -> let s = s |> Array.ofSeq in if s.Length <> 3 then failwithf "Expecting strides to have length three" else s
             | _ -> [|1; 1; 1|]
         let paddings = 
             match padding, paddings with
             | Some _ , Some _ -> failwithf "Expecting only one of padding, paddings"
             | Some p, None -> [|p; p; p|]
-            | None, Some p -> p |> Array.ofSeq
+            | None, Some p -> let p = p |> Array.ofSeq in if p.Length <> 3 then failwithf "Expecting paddings to have length three" else p
             | _ -> [|0; 0; 0|]
         let dilations = 
             match dilation, dilations with
             | Some _ , Some _ -> failwithf "Expecting only one of dilation, dilations"
             | Some d, None -> [|d; d; d|]
-            | None, Some d -> d |> Array.ofSeq
+            | None, Some d -> let d = d |> Array.ofSeq in if d.Length <> 3 then failwithf "Expecting dilations to have length three" else d
             | _ -> [|1; 1; 1|]
         checkCanConv3d a.shape b.shape strides paddings dilations
         let mutable b = b
 
@@ -34,3 +34,8 @@ type TestModel () =
         let d3flat = d3.flatten()
         Assert.AreEqual(d1flatCorrect, d3flat)
 
+    // [<Test>]
+    // member this.TestLinear () =
+    //     let n, dIn, h, dOut = 64, 1000, 100, 10
+    //     let x = dsharp.randn(n, dIn)
+    //     let y = dsharp.randn(n, dOut)
@@ -29,14 +29,12 @@ open DiffSharp.Backend.None
 
 type Net() =
     inherit Model()
-    let conv1 = Conv2d(1, 32, 3)
-    let conv2 = Conv2d(32, 64, 3)
+    let conv1 = Conv2d(1, 2, 3)
+    let conv2 = Conv2d(2, 4, 3)
     let k = dsharp.randn([1;1;28;28]) |> conv1.forward |> conv2.forward |> dsharp.nelement
     let fc1 = Linear(k, 128)
     let fc2 = Linear(128, 10)
-    do 
-        base.add(["conv1", conv1; "conv2", conv2])
-        base.add(["fc1", fc1; "fc2", fc2])
+    do base.add([conv1; conv2; fc1; fc2])
     override __.forward(x) =
         x
         // |> dsharp.view [-1; 28*28]
@@ -50,7 +48,6 @@ type Net() =
         |> fc2.forward
 
 
-
 [<EntryPoint>]
 let main _argv =
     printfn "Hello World from F#!"
@@ -60,30 +57,50 @@ let main _argv =
     let dataset = MNIST("./data", train=true)
     let dataloader = dataset.loader(8, shuffle=true, numBatches=50)
 
-    let net = Net()
+    // let net = Net()
+
+    let cnn () =
+        let conv1 = Conv2d(1, 2, 3)
+        let conv2 = Conv2d(2, 4, 3)
+        let k = dsharp.randn([1;1;28;28]) |> conv1.forward |> conv2.forward |> dsharp.nelement
+        let fc1 = Linear(k, 128)
+        let fc2 = Linear(128, 10)
+        Model.create [conv1; conv2; fc1; fc2] 
+                     (conv1.forward
+                        >> dsharp.relu
+                        >> conv2.forward
+                        >> dsharp.relu
+                        >> dsharp.flatten 1
+                        >> fc1.forward
+                        >> dsharp.relu
+                        >> fc2.forward)
+    let net = cnn()
+
     printfn "params: %A" (net.nparameters())
+    // printfn "params: %A" (net.Parameters)
 
-    let optimizer = SGD(net, learningRate=dsharp.tensor(0.01), momentum=dsharp.tensor(0.9), nesterov=true)
-    let mutable epoch = -1
-    let mutable stop = false
-    while not stop do
-        epoch <- epoch + 1
-        for i, data, targets in dataloader.epoch() do
-            net.reverseDiff()
-            let o = net.forward(data)
-            let loss = dsharp.crossEntropyLoss(o, targets)
-            loss.reverse()
-            optimizer.step()
+    // let optimizer = SGD(net, learningRate=dsharp.tensor(0.01), momentum=dsharp.tensor(0.9), nesterov=true)
+    // let mutable epoch = -1
+    // let mutable stop = false
+    // while not stop do
+    //     epoch <- epoch + 1
+    //     for i, data, targets in dataloader.epoch() do
+    //         net.reverseDiff()
+    //         let o = net.forward(data)
+    //         let loss = dsharp.crossEntropyLoss(o, targets)
+    //         loss.reverse()
+    //         optimizer.step()
 
-            let loss = loss.toScalar() :?> float32
-            printfn "epoch %A, minibatch %A, loss %A\r" epoch i loss
+    //         let loss = loss.toScalar() :?> float32
+    //         printfn "epoch %A, minibatch %A, loss %A\r" epoch i loss
 
     // let loss data target p = net.forwardCompose (dsharp.crossEntropyLoss(target=target)) data p
-    // let loss = net.forwardLoss dsharp.crossEntropyLoss
-    // let mutable p = net.getParameters()
-    // for i, data, target in dataloader.epoch() do
-    //     let loss, g = dsharp.pgrad (loss data target) p
-    //     p <- p - 0.1 * g
-    //     printfn "%A %A" i loss
+    let loss = net.forwardLoss dsharp.crossEntropyLoss
+    let mutable p = net.getParameters()
+    for i, data, target in dataloader.epoch() do
+        let loss, g = dsharp.pgrad (loss data target) p
+        p <- p - 0.1 * g
+        printfn "%A %A" i loss
+
 
     0 // return an integer exit code