From 6ee17c8c9f6ebc14b99db2c29b7d71c0c9716cc5 Mon Sep 17 00:00:00 2001 From: Niklas Gustafsson Date: Mon, 26 Jun 2023 13:11:18 -0700 Subject: [PATCH 1/9] Adding synthetic data generation example. --- .gitignore | 2 + tutorials/CSharp/synthetic_data.ipynb | 389 ++++++++++++++++++++++++++ 2 files changed, 391 insertions(+) create mode 100644 tutorials/CSharp/synthetic_data.ipynb diff --git a/.gitignore b/.gitignore index 01060ef..2ceeb43 100644 --- a/.gitignore +++ b/.gitignore @@ -352,3 +352,5 @@ MigrationBackup/ # Ionide (cross platform F# VS Code tools) working folder .ionide/ +*.dat.x +*.dat.y diff --git a/tutorials/CSharp/synthetic_data.ipynb b/tutorials/CSharp/synthetic_data.ipynb new file mode 100644 index 0000000..e83a338 --- /dev/null +++ b/tutorials/CSharp/synthetic_data.ipynb @@ -0,0 +1,389 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using TorchSharp to Generate Synthetic Data for a Regression Problem\n", + "\n", + "This tutorial is based on a [PyTorch example](https://jamesmccaffrey.wordpress.com/2023/06/09/using-pytorch-to-generate-synthetic-data-for-a-regression-problem/) posted by James D. McCaffrey on his blog, ported to TorchSharp.\n", + "\n", + "Note that we're taking some shortcuts in this example -- rather than writing the data set as a text file that can be loaded from any modeling framework, we're saving the data as serialized TorchSharp tensors. Is should be straight-forward to modify the tutorial to write the data sets as text, instead." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "#r \"nuget: TorchSharp-cpu\"\n", + "\n", + "using TorchSharp;\n", + "using static TorchSharp.TensorExtensionMethods;" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Neural networks can be used to generate data as well as train. The synthetic data can then be used to evaluate different models to see how well they can copy the behavior of the network used to produce the data.\n", + "\n", + "First, we will create the model that will be used to generate the synthetic data. Later, we'll construct a second model that will be trained on the data the first model generates." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "class Net : torch.nn.Module\n", + "{\n", + " private torch.nn.Module hid1;\n", + " private torch.nn.Module oupt;\n", + "\n", + " public Net(int n_in) : base(nameof(Net))\n", + " {\n", + " var h = torch.nn.Linear(n_in, 10);\n", + " var o = torch.nn.Linear(10,1);\n", + "\n", + " var lim = 0.80;\n", + " torch.nn.init.uniform_(h.weight, -lim, lim);\n", + " torch.nn.init.uniform_(h.bias, -lim, lim);\n", + " torch.nn.init.uniform_(o.weight, -lim, lim);\n", + " torch.nn.init.uniform_(o.bias, -lim, lim);\n", + "\n", + " hid1 = h;\n", + " oupt = o;\n", + "\n", + " RegisterComponents();\n", + " }\n", + " public override torch.Tensor forward(torch.Tensor input)\n", + " {\n", + " using var _ = torch.NewDisposeScope();\n", + " var z = torch.tanh(hid1.call(input));\n", + " z = torch.sigmoid(oupt.call(z));\n", + " return z.MoveToOuterDisposeScope();\n", + " }\n", + "}" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we have our generative network, we can define the method to create the data set. If you compare this with the PyTorch code, you will notice that we're relying on TorchSharp to generate a whole batch of data at once, rather than looping. We're also using TorchSharp instead of Numpy for the noise-generation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "void CreateDataFile(Net net, int n_in, string fileName, int n_items)\n", + "{\n", + "\n", + " var x_lo = -1.0;\n", + " var x_hi = 1.0;\n", + "\n", + " var X = (x_hi - x_lo) * torch.rand(new long[] {n_items, n_in}) + x_lo;\n", + "\n", + " torch.Tensor y;\n", + "\n", + " using (torch.no_grad()) {\n", + " y = net.call(X);\n", + " }\n", + "\n", + " // Add some noise in order to not make it too easy to train...\n", + " y += torch.randn(y.shape) * 0.01;\n", + "\n", + " // Make sure that the output isn't negative.\n", + " y = torch.where(y < 0.0, 0.01 * torch.randn(y.shape) + 0.01, y);\n", + "\n", + " // Save the data in two separate, binary files.\n", + " X.save(fileName + \".x\");\n", + " y.save(fileName + \".y\");\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "var net = new Net(6);" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create the data files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "CreateDataFile(net, 6, \"train.dat\", 200);\n", + "CreateDataFile(net, 6, \"test.dat\", 40);" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load them again. This is just to demonstrate how to get the data from disk." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "var X_train = torch.Tensor.load(\"train.dat.x\");\n", + "var y_train = torch.Tensor.load(\"train.dat.y\");\n", + "var X_test = torch.Tensor.load(\"test.dat.x\");\n", + "var y_test = torch.Tensor.load(\"test.dat.y\");" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create another class, with slightly different logic, and train it on the generated data set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "class Net2 : torch.nn.Module\n", + "{\n", + " private torch.nn.Module hid1;\n", + " private torch.nn.Module oupt;\n", + "\n", + " public Net2(int n_in) : base(nameof(Net2))\n", + " {\n", + " hid1 = torch.nn.Linear(n_in, 5);\n", + " oupt = torch.nn.Linear(5,1);\n", + "\n", + " RegisterComponents();\n", + " }\n", + " public override torch.Tensor forward(torch.Tensor input)\n", + " {\n", + " using var _ = torch.NewDisposeScope();\n", + " var z = torch.nn.functional.relu(hid1.call(input));\n", + " z = torch.sigmoid(oupt.call(z));\n", + " return z.MoveToOuterDisposeScope();\n", + " }\n", + "}" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We create an instance of the second network, choose a loss to use, and then we're ready to train it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "var model = new Net2(6);\n", + "\n", + "var loss = torch.nn.MSELoss();" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A standard training loop. It ends with evaluating the trained model on the training set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "var learning_rate = 0.01f;\n", + "\n", + "Console.WriteLine(\" initial loss = \" + loss.forward(model.forward(X_train), y_train).item().ToString());\n", + "\n", + "var optimizer = torch.optim.SGD(model.parameters(), learning_rate);\n", + "\n", + "for (int i = 0; i < 10000; i++) {\n", + " // Compute the loss\n", + " using var output = loss.forward(model.forward(X_train), y_train);\n", + "\n", + " // Clear the gradients before doing the back-propagation\n", + " model.zero_grad();\n", + "\n", + " // Do back-progatation, which computes all the gradients.\n", + " output.backward();\n", + "\n", + " optimizer.step();\n", + "}\n", + "\n", + "Console.WriteLine(\" final loss = \" + loss.forward(model.forward(X_train), y_train).item());" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The thing we're really curious about is how the second model does on the test set, which it didn't see during training. If the loss is significantly greater than the one from the training set, we need to train more, i.e. start another epoch. If the test set loss doesn't get closer to the training set loss with more epochs, we may need more data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "loss.forward(model.forward(X_test), y_test).item()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 0b757b376bdbc06cd86874931bfbf684f8f2b6d1 Mon Sep 17 00:00:00 2001 From: Niklas Gustafsson Date: Mon, 26 Jun 2023 13:22:50 -0700 Subject: [PATCH 2/9] Cleaned up data file loading in synthetic data example. --- tutorials/CSharp/synthetic_data.ipynb | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tutorials/CSharp/synthetic_data.ipynb b/tutorials/CSharp/synthetic_data.ipynb index e83a338..7a15230 100644 --- a/tutorials/CSharp/synthetic_data.ipynb +++ b/tutorials/CSharp/synthetic_data.ipynb @@ -95,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -132,6 +132,11 @@ " // Save the data in two separate, binary files.\n", " X.save(fileName + \".x\");\n", " y.save(fileName + \".y\");\n", + "}\n", + "\n", + "(torch.Tensor X, torch.Tensor y) LoadDataFile(string fileName)\n", + "{\n", + " return (torch.Tensor.load(fileName + \".x\"), torch.Tensor.load(fileName + \".y\"));\n", "}" ] }, @@ -206,10 +211,8 @@ }, "outputs": [], "source": [ - "var X_train = torch.Tensor.load(\"train.dat.x\");\n", - "var y_train = torch.Tensor.load(\"train.dat.y\");\n", - "var X_test = torch.Tensor.load(\"test.dat.x\");\n", - "var y_test = torch.Tensor.load(\"test.dat.y\");" + "var (X_train, y_train) = LoadDataFile(\"train.dat\");\n", + "var (X_test, y_test) = LoadDataFile(\"test.dat\");" ] }, { From 4c7dbb02e0184072bedea5c87f83d6d7405853c2 Mon Sep 17 00:00:00 2001 From: Niklas Gustafsson Date: Mon, 26 Jun 2023 13:28:31 -0700 Subject: [PATCH 3/9] Use in-place tensor operators where possible. --- tutorials/CSharp/synthetic_data.ipynb | 126 +++++++++++++++++++++----- 1 file changed, 105 insertions(+), 21 deletions(-) diff --git a/tutorials/CSharp/synthetic_data.ipynb b/tutorials/CSharp/synthetic_data.ipynb index 7a15230..164d8b1 100644 --- a/tutorials/CSharp/synthetic_data.ipynb +++ b/tutorials/CSharp/synthetic_data.ipynb @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -26,7 +26,17 @@ "languageId": "polyglot-notebook" } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
Installed Packages
  • TorchSharp-cpu, 0.100.3
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "#r \"nuget: TorchSharp-cpu\"\n", "\n", @@ -46,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": { "vscode": { "languageId": "polyglot-notebook" @@ -78,8 +88,8 @@ " public override torch.Tensor forward(torch.Tensor input)\n", " {\n", " using var _ = torch.NewDisposeScope();\n", - " var z = torch.tanh(hid1.call(input));\n", - " z = torch.sigmoid(oupt.call(z));\n", + " var z = hid1.call(input).tanh_();\n", + " z = oupt.call(z).sigmoid_();\n", " return z.MoveToOuterDisposeScope();\n", " }\n", "}" @@ -95,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 43, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -142,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -169,7 +179,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -197,7 +207,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -225,7 +235,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -254,8 +264,8 @@ " public override torch.Tensor forward(torch.Tensor input)\n", " {\n", " using var _ = torch.NewDisposeScope();\n", - " var z = torch.nn.functional.relu(hid1.call(input));\n", - " z = torch.sigmoid(oupt.call(z));\n", + " var z = hid1.call(input).relu_();\n", + " z = oupt.call(z).sigmoid_();\n", " return z.MoveToOuterDisposeScope();\n", " }\n", "}" @@ -271,7 +281,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 48, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -295,12 +305,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "A standard training loop. It ends with evaluating the trained model on the training set." + "We need an optimizer." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -315,11 +325,44 @@ "outputs": [], "source": [ "var learning_rate = 0.01f;\n", - "\n", + "var optimizer = torch.optim.SGD(model.parameters(), learning_rate);" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A standard training loop. It ends with evaluating the trained model on the training set." + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " initial loss = 0.023704259\n", + " final loss = 0.023490703\n" + ] + } + ], + "source": [ "Console.WriteLine(\" initial loss = \" + loss.forward(model.forward(X_train), y_train).item().ToString());\n", "\n", - "var optimizer = torch.optim.SGD(model.parameters(), learning_rate);\n", - "\n", "for (int i = 0; i < 10000; i++) {\n", " // Compute the loss\n", " using var output = loss.forward(model.forward(X_train), y_train);\n", @@ -346,7 +389,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 60, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -358,14 +401,55 @@ "languageId": "polyglot-notebook" } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
0.021710658
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "loss.forward(model.forward(X_test), y_test).item()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 51, "metadata": { "dotnet_interactive": { "language": "csharp" From 0026cfd1433fa4803c4ce3b558d02c063ad21219 Mon Sep 17 00:00:00 2001 From: Niklas Gustafsson Date: Mon, 26 Jun 2023 13:37:52 -0700 Subject: [PATCH 4/9] Correcting a mistake in the synthetic example --- tutorials/CSharp/synthetic_data.ipynb | 32 +++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tutorials/CSharp/synthetic_data.ipynb b/tutorials/CSharp/synthetic_data.ipynb index 164d8b1..e1045bc 100644 --- a/tutorials/CSharp/synthetic_data.ipynb +++ b/tutorials/CSharp/synthetic_data.ipynb @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 61, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 62, "metadata": { "vscode": { "languageId": "polyglot-notebook" @@ -105,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 63, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -137,7 +137,7 @@ " y += torch.randn(y.shape) * 0.01;\n", "\n", " // Make sure that the output isn't negative.\n", - " y = torch.where(y < 0.0, 0.01 * torch.randn(y.shape) + 0.01, y);\n", + " y += torch.where(y < 0.0, 0.01 * torch.randn(y.shape) + 0.01, torch.zeros(y.shape));\n", "\n", " // Save the data in two separate, binary files.\n", " X.save(fileName + \".x\");\n", @@ -152,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 64, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -179,7 +179,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 65, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -207,7 +207,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 66, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -235,7 +235,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 67, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -281,7 +281,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 68, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -310,7 +310,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 69, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -338,7 +338,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 78, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -355,8 +355,8 @@ "name": "stdout", "output_type": "stream", "text": [ - " initial loss = 0.023704259\n", - " final loss = 0.023490703\n" + " initial loss = 0.0063750837\n", + " final loss = 0.007656585\n" ] } ], @@ -389,7 +389,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 79, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -405,7 +405,7 @@ { "data": { "text/html": [ - "
0.021710658
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "loss.forward(model.forward(X_test), y_test).item()" ] }, { "cell_type": "code", - "execution_count": 72, + "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" From ab7edaef53d60e8cede1b50f3c254dad0838b01d Mon Sep 17 00:00:00 2001 From: Niklas Gustafsson Date: Mon, 26 Jun 2023 16:58:35 -0700 Subject: [PATCH 8/9] Added batching and data loaders to the sample --- tutorials/CSharp/synthetic_data.ipynb | 247 +++++++++++++++++++++++--- 1 file changed, 223 insertions(+), 24 deletions(-) diff --git a/tutorials/CSharp/synthetic_data.ipynb b/tutorials/CSharp/synthetic_data.ipynb index 64d1768..a5446b6 100644 --- a/tutorials/CSharp/synthetic_data.ipynb +++ b/tutorials/CSharp/synthetic_data.ipynb @@ -9,6 +9,8 @@ "\n", "This tutorial is based on a [PyTorch example](https://jamesmccaffrey.wordpress.com/2023/06/09/using-pytorch-to-generate-synthetic-data-for-a-regression-problem/) posted by James D. McCaffrey on his blog, ported to TorchSharp.\n", "\n", + "Synthetic data sets can be very useful when evaluating and choosing a model.\n", + "\n", "Note that we're taking some shortcuts in this example -- rather than writing the data set as a text file that can be loaded from any modeling framework, we're saving the data as serialized TorchSharp tensors. Is should be straight-forward to modify the tutorial to write the data sets as text, instead." ] }, @@ -129,7 +131,7 @@ " y = net.call(X);\n", " }\n", "\n", - " // Add some noise in order to not make it too easy to train...\n", + " // Add some noise in order not to make it too easy to train...\n", " y += torch.randn(y.shape) * 0.01;\n", "\n", " // Make sure that the output isn't negative.\n", @@ -189,8 +191,8 @@ }, "outputs": [], "source": [ - "CreateDataFile(net, 6, \"train.dat\", 200);\n", - "CreateDataFile(net, 6, \"test.dat\", 40);" + "CreateDataFile(net, 6, \"train.dat\", 2000);\n", + "CreateDataFile(net, 6, \"test.dat\", 400);" ] }, { @@ -272,7 +274,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We create an instance of the second network, choose a loss to use, and then we're ready to train it." + "We create an instance of the second network, choose a loss to use, and then we're ready to train it. We also need an optimizer and maybe even an LR scheduler." ] }, { @@ -293,7 +295,11 @@ "source": [ "var model = new Net2(6);\n", "\n", - "var loss = torch.nn.MSELoss();" + "var loss = torch.nn.MSELoss();\n", + "\n", + "var learning_rate = 0.01f;\n", + "var optimizer = torch.optim.Rprop(model.parameters(), learning_rate);\n", + "var scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer);" ] }, { @@ -301,7 +307,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We need an optimizer and maybe even an LR scheduler." + "A pretty standard training loop. The input is just in one batch. It ends with evaluating the trained model on the training set." ] }, { @@ -320,9 +326,27 @@ }, "outputs": [], "source": [ - "var learning_rate = 0.01f;\n", - "var optimizer = torch.optim.Rprop(model.parameters(), learning_rate);\n", - "var scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer);" + "Console.WriteLine(\" initial loss = \" + loss.forward(model.forward(X_train), y_train).item().ToString());\n", + "\n", + "for (int i = 0; i < 10000; i++) {\n", + "\n", + " // Compute the loss\n", + " using var output = loss.forward(model.forward(X_train), y_train);\n", + "\n", + " // Clear the gradients before doing the back-propagation\n", + " model.zero_grad();\n", + "\n", + " // Do back-progatation, which computes all the gradients.\n", + " output.backward();\n", + "\n", + " optimizer.step();\n", + " \n", + " if (i % 100 == 99) {\n", + " scheduler.step();\n", + " }\n", + "}\n", + "\n", + "Console.WriteLine(\" final loss = \" + loss.forward(model.forward(X_train), y_train).item());" ] }, { @@ -330,7 +354,63 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "A pretty standard training loop. The input is just in one batch. It ends with evaluating the trained model on the training set." + "The thing we're really curious about is how the second model does on the test set, which it didn't see during training. If the loss is significantly greater than the one from the training set, we need to train more, i.e. start another epoch. If the test set loss doesn't get closer to the training set loss with more epochs, we may need more data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "loss.forward(model.forward(X_test), y_test).item()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we want to be a little bit more advanced, we can split the training set into batches. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "var N = X_train.shape[0]/10;\n", + "var X_batch = X_train.split(N);\n", + "var y_batch = y_train.split(N);" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That means modifying the training loop, too. Running multiple batches can take longer, but the model may converge quicker, so the total time before you have the desired model may still be shorter." ] }, { @@ -351,33 +431,52 @@ "source": [ "Console.WriteLine(\" initial loss = \" + loss.forward(model.forward(X_train), y_train).item().ToString());\n", "\n", - "for (int i = 0; i < 10000; i++) {\n", + "for (int i = 0; i < 5000; i++) {\n", "\n", - " // Compute the loss\n", - " using var output = loss.forward(model.forward(X_train), y_train);\n", + " for (var j = 0; j < X_batch.Length; j++) {\n", + " // Compute the loss\n", + " using var output = loss.forward(model.forward(X_batch[j]), y_batch[j]);\n", "\n", - " // Clear the gradients before doing the back-propagation\n", - " model.zero_grad();\n", + " // Clear the gradients before doing the back-propagation\n", + " model.zero_grad();\n", "\n", - " // Do back-progatation, which computes all the gradients.\n", - " output.backward();\n", + " // Do back-progatation, which computes all the gradients.\n", + " output.backward();\n", "\n", - " optimizer.step();\n", - " \n", - " if (i % 100 == 99) {\n", - " scheduler.step();\n", + " optimizer.step();\n", " }\n", + " \n", + " scheduler.step();\n", "}\n", "\n", "Console.WriteLine(\" final loss = \" + loss.forward(model.forward(X_train), y_train).item());" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "loss.forward(model.forward(X_test), y_test).item()" + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "The thing we're really curious about is how the second model does on the test set, which it didn't see during training. If the loss is significantly greater than the one from the training set, we need to train more, i.e. start another epoch. If the test set loss doesn't get closer to the training set loss with more epochs, we may need more data." + "If we wanted to be really advanced, we would use TorchSharp data sets and data loaders, which would allow us to randomize the test data set between epocs (at the end of the outer training loop). Here's how we'd do that." ] }, { @@ -396,7 +495,37 @@ }, "outputs": [], "source": [ - "loss.forward(model.forward(X_test), y_test).item()" + "class SyntheticDataset : torch.utils.data.Dataset {\n", + "\n", + " public SyntheticDataset(string fileName) \n", + " {\n", + " _data = torch.Tensor.load(fileName + \".x\");\n", + " _labels = torch.Tensor.load(fileName + \".y\");\n", + " if (_data.shape[0] != _labels.shape[0])\n", + " throw new InvalidOperationException(\"Data and labels are not of the same lengths.\");\n", + " }\n", + "\n", + " public override Dictionary GetTensor(long index)\n", + " {\n", + " var rdic = new Dictionary();\n", + " rdic.Add(\"data\", _data[(int)index]);\n", + " rdic.Add(\"label\", _labels[(int)index]);\n", + " return rdic;\n", + " }\n", + "\n", + " public override long Count => _data.shape[0];\n", + "\n", + " private torch.Tensor _data;\n", + " private torch.Tensor _labels;\n", + "}" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The training loop gets slight more complex with the data set." ] }, { @@ -414,7 +543,77 @@ } }, "outputs": [], - "source": [] + "source": [ + "var training_data = new SyntheticDataset(\"train.dat\");\n", + "var train = new torch.utils.data.DataLoader(training_data, 200, shuffle: true);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "Console.WriteLine(\" initial loss = \" + loss.forward(model.forward(X_train), y_train).item().ToString());\n", + "\n", + "for (int i = 0; i < 1000; i++) {\n", + "\n", + " foreach (var data in train)\n", + " {\n", + " // Compute the loss\n", + " using var output = loss.forward(model.forward(data[\"data\"]), data[\"label\"]);\n", + "\n", + " // Clear the gradients before doing the back-propagation\n", + " model.zero_grad();\n", + "\n", + " // Do back-progatation, which computes all the gradients.\n", + " output.backward();\n", + "\n", + " optimizer.step();\n", + " }\n", + " \n", + " scheduler.step();\n", + "}\n", + "\n", + "Console.WriteLine(\" final loss = \" + loss.forward(model.forward(X_train), y_train).item());" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It's slower, and the convergence isn't that much better, but that will depend on the model used. You just have to try and try different things." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "loss.forward(model.forward(X_test), y_test).item()" + ] } ], "metadata": { From 419f03a68b071aa766a79a7eeeb32cbfeb0a3a79 Mon Sep 17 00:00:00 2001 From: Niklas Gustafsson Date: Mon, 26 Jun 2023 19:05:48 -0700 Subject: [PATCH 9/9] Added commenting headers and an F# version --- tutorials/CSharp/synthetic_data.ipynb | 13 +- tutorials/FSharp/synthetic_data.ipynb | 594 ++++++++++++++++++++++++++ 2 files changed, 604 insertions(+), 3 deletions(-) create mode 100644 tutorials/FSharp/synthetic_data.ipynb diff --git a/tutorials/CSharp/synthetic_data.ipynb b/tutorials/CSharp/synthetic_data.ipynb index a5446b6..73f5fed 100644 --- a/tutorials/CSharp/synthetic_data.ipynb +++ b/tutorials/CSharp/synthetic_data.ipynb @@ -41,6 +41,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "#### Generative Network\n", "Neural networks can be used to generate data as well as train. The synthetic data can then be used to evaluate different models to see how well they can copy the behavior of the network used to produce the data.\n", "\n", "First, we will create the model that will be used to generate the synthetic data. Later, we'll construct a second model that will be trained on the data the first model generates." @@ -200,7 +201,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Load them again. This is just to demonstrate how to get the data from disk." + "#### Using the Data\n", + "\n", + "Load the data from files again. This is just to demonstrate how to get the data from disk." ] }, { @@ -274,7 +277,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We create an instance of the second network, choose a loss to use, and then we're ready to train it. We also need an optimizer and maybe even an LR scheduler." + "Create an instance of the second network, choose a loss to use, and then you're ready to train it. You also need an optimizer and maybe even an LR scheduler." ] }, { @@ -381,6 +384,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "#### Splitting the Data into Batches\n", + "\n", "If we want to be a little bit more advanced, we can split the training set into batches. " ] }, @@ -476,6 +481,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "#### Dataset and DataLoader\n", + "\n", "If we wanted to be really advanced, we would use TorchSharp data sets and data loaders, which would allow us to randomize the test data set between epocs (at the end of the outer training loop). Here's how we'd do that." ] }, @@ -525,7 +532,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The training loop gets slight more complex with the data set." + "The training loop gets slightly more complex with the data set." ] }, { diff --git a/tutorials/FSharp/synthetic_data.ipynb b/tutorials/FSharp/synthetic_data.ipynb new file mode 100644 index 0000000..11be2ea --- /dev/null +++ b/tutorials/FSharp/synthetic_data.ipynb @@ -0,0 +1,594 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using TorchSharp to Generate Synthetic Data for a Regression Problem\n", + "\n", + "This tutorial is based on a [PyTorch example](https://jamesmccaffrey.wordpress.com/2023/06/09/using-pytorch-to-generate-synthetic-data-for-a-regression-problem/) posted by James D. McCaffrey on his blog, ported to TorchSharp.\n", + "\n", + "Synthetic data sets can be very useful when evaluating and choosing a model.\n", + "\n", + "Note that we're taking some shortcuts in this example -- rather than writing the data set as a text file that can be loaded from any modeling framework, we're saving the data as serialized TorchSharp tensors. Is should be straight-forward to modify the tutorial to write the data sets as text, instead." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "#r \"nuget: TorchSharp-cpu\"\n", + "\n", + "open TorchSharp\n", + "open type TorchSharp.TensorExtensionMethods" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Generative Network\n", + "\n", + "Neural networks can be used to generate data as well as train. The synthetic data can then be used to evaluate different models to see how well they can copy the behavior of the network used to produce the data.\n", + "\n", + "First, we will create the model that will be used to generate the synthetic data. Later, we'll construct a second model that will be trained on the data the first model generates." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "type Net(n_in : int) as this = \n", + " inherit torch.nn.Module(\"Net\")\n", + "\n", + " let hid1 = torch.nn.Linear(n_in, 10)\n", + " let oupt = torch.nn.Linear(10, 1)\n", + "\n", + " do\n", + " let lim = 0.80;\n", + " torch.nn.init.uniform_(hid1.weight, -lim, lim) |> ignore\n", + " torch.nn.init.uniform_(hid1.bias, -lim, lim) |> ignore\n", + " torch.nn.init.uniform_(oupt.weight, -lim, lim) |> ignore\n", + " torch.nn.init.uniform_(oupt.bias, -lim, lim) |> ignore\n", + " \n", + " this.RegisterComponents()\n", + "\n", + " override _.forward(input) = \n", + " use _ = torch.NewDisposeScope()\n", + " let z = hid1.call(input).tanh_()\n", + " let x = oupt.call(z).sigmoid_()\n", + " x.MoveToOuterDisposeScope()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we have our generative network, we can define the method to create the data set. If you compare this with the PyTorch code, you will notice that we're relying on TorchSharp to generate a whole batch of data at once, rather than looping. We're also using TorchSharp instead of Numpy for the noise-generation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "let create_data_file(net: Net, n_in: int64, fileName: string, n_items: int64) =\n", + " let x_lo = -1.0\n", + " let x_hi = 1.0\n", + "\n", + " let one_hundredth = 0.01.ToScalar()\n", + "\n", + " let X = (x_hi - x_lo).ToScalar() * torch.rand([|n_items; n_in|]) + x_lo.ToScalar()\n", + "\n", + " use d = torch.no_grad()\n", + "\n", + " let mutable y = net.call(X)\n", + "\n", + " y <- y + torch.rand(y.shape) * one_hundredth\n", + "\n", + " y <- torch.where(y.le(torch.tensor(0.0)), y + one_hundredth * torch.randn(y.shape) + one_hundredth, y)\n", + "\n", + " X.save(fileName + \".x\")\n", + " y.save(fileName + \".y\")\n", + "\n", + "let load_data_file(fileName: string) = (torch.Tensor.load(fileName + \".x\"), torch.Tensor.load(fileName + \".y\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "let net = new Net(6)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create the data files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "create_data_file(net, 6, \"train.dat\", 2000);\n", + "create_data_file(net, 6, \"test.dat\", 400);" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Using the Data\n", + "\n", + "Load the data from files again. This is just to demonstrate how to get the data from disk." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "let X_train,y_train = load_data_file(\"train.dat\")\n", + "let X_test, y_test = load_data_file(\"test.dat\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create another model class, with slightly different logic, and train it on the generated data set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "type Net2(n_in : int) as this = \n", + " inherit torch.nn.Module(\"Net2\")\n", + "\n", + " let hid1 = torch.nn.Linear(n_in, 5)\n", + " let oupt = torch.nn.Linear(5, 1)\n", + "\n", + " do\n", + " this.RegisterComponents()\n", + "\n", + " override _.forward(input) = \n", + " use _ = torch.NewDisposeScope()\n", + " let z = hid1.call(input).relu_()\n", + " let x = oupt.call(z).sigmoid_()\n", + " x.MoveToOuterDisposeScope()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create an instance of the second network, choose a loss to use, and then you're ready to train it. You also need an optimizer and maybe even an LR scheduler." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "let model = new Net2(6)\n", + "\n", + "let loss = torch.nn.MSELoss()\n", + "\n", + "let learning_rate = 0.01\n", + "let optimizer = torch.optim.Rprop(model.parameters(), learning_rate)\n", + "let scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A pretty standard training loop. The input is just in one batch. It ends with evaluating the trained model on the training set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "printf \" initial loss = %s\\n\" (loss.forward(model.forward(X_train), y_train).item().ToString())\n", + "\n", + "for epoch = 1 to 1000 do\n", + "\n", + " let output = loss.forward(model.forward(X_train), y_train)\n", + " \n", + " // Clear the gradients before doing the back-propagation\n", + " model.zero_grad()\n", + "\n", + " // Do back-progatation, which computes all the gradients.\n", + " output.backward()\n", + "\n", + " optimizer.step() |> ignore\n", + "\n", + " if epoch % 100 = 99 then\n", + " scheduler.step()\n", + "\n", + "printf \" final loss = %s\\n\" (loss.forward(model.forward(X_train), y_train).item().ToString())\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The thing we're really curious about is how the second model does on the test set, which it didn't see during training. If the loss is significantly greater than the one from the training set, we need to train more, i.e. start another epoch. If the test set loss doesn't get closer to the training set loss with more epochs, we may need more data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "loss.forward(model.forward(X_test), y_test).item()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Splitting the Data into Batches\n", + "\n", + "If we want to be a little bit more advanced, we can split the training set into batches. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "let N = X_train.shape[0]/10L\n", + "let X_batch = X_train.split(N)\n", + "let y_batch = y_train.split(N)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That means modifying the training loop, too. Running multiple batches can take longer, but the model may converge quicker, so the total time before you have the desired model may still be shorter." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "printf \" initial loss = %s\\n\" (loss.forward(model.forward(X_train), y_train).item().ToString())\n", + "\n", + "for epoch = 1 to 1000 do\n", + "\n", + " for j = 0 to X_batch.Length-1 do\n", + "\n", + " let output = loss.forward(model.forward(X_batch[j]), y_batch[j])\n", + " \n", + " // Clear the gradients before doing the back-propagation\n", + " model.zero_grad()\n", + "\n", + " // Do back-progatation, which computes all the gradients.\n", + " output.backward()\n", + "\n", + " optimizer.step() |> ignore\n", + "\n", + " scheduler.step()\n", + "\n", + "printf \" final loss = %s\\n\" (loss.forward(model.forward(X_train), y_train).item().ToString())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "loss.forward(model.forward(X_test), y_test).item()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Dataset and DataLoader\n", + "\n", + "If we wanted to be really advanced, we would use TorchSharp data sets and data loaders, which would allow us to randomize the test data set between epocs (at the end of the outer training loop). Here's how we'd do that." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "type SyntheticDataset(fileName: string) as this = \n", + " inherit torch.utils.data.Dataset()\n", + "\n", + " let mutable _data:torch.Tensor = torch.Tensor.load(fileName + \".x\")\n", + " let mutable _labels:torch.Tensor = torch.Tensor.load(fileName + \".y\")\n", + "\n", + " \n", + " override _.GetTensor(index: int64) =\n", + " let rdic = new System.Collections.Generic.Dictionary()\n", + " rdic.Add(\"data\", _data[index])\n", + " rdic.Add(\"label\", _labels[index])\n", + " rdic\n", + "\n", + " override _.Count = _data.shape[0]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The training loop gets slightly more complex with the data set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "let training_data = new SyntheticDataset(\"train.dat\")\n", + "let train = new torch.utils.data.DataLoader(training_data, 200, shuffle=true);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "printf \" initial loss = %s\\n\" (loss.forward(model.forward(X_train), y_train).item().ToString())\n", + "\n", + "for epoch = 1 to 1000 do\n", + "\n", + " for data in train do\n", + "\n", + " let output = loss.forward(model.forward(data[\"data\"]), data[\"label\"])\n", + " \n", + " // Clear the gradients before doing the back-propagation\n", + " model.zero_grad()\n", + "\n", + " // Do back-progatation, which computes all the gradients.\n", + " output.backward()\n", + "\n", + " optimizer.step() |> ignore\n", + "\n", + " scheduler.step()\n", + "\n", + "printf \" final loss = %s\\n\" (loss.forward(model.forward(X_train), y_train).item().ToString())" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It's slower, and the convergence isn't that much better, but that will depend on the model used. You just have to try and try different things." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [], + "source": [ + "loss.forward(model.forward(X_test), y_test).item()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}