diff --git a/example/image_models/cnn_pytorch.ipynb b/example/image_models/cnn_pytorch.ipynb deleted file mode 100644 index 08959ab6c..000000000 --- a/example/image_models/cnn_pytorch.ipynb +++ /dev/null @@ -1,239 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import torchvision\n", - "from torchvision.transforms import v2\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/omilod/Desktop/projects/GigaTorch/env/lib/python3.11/site-packages/torchvision/transforms/v2/_deprecated.py:41: UserWarning: The transform `ToTensor()` is deprecated and will be removed in a future release. Instead, please use `v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)])`.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Files already downloaded and verified\n", - "Files already downloaded and verified\n" - ] - } - ], - "source": [ - "# Device configuration\n", - "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", - "\n", - "num_epochs = 10\n", - "batch_size = 4\n", - "learning_rate = 0.001\n", - "\n", - "transform = v2.Compose(\n", - " [v2.ToTensor(),\n", - " v2.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n", - "\n", - "train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)\n", - "test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)\n", - "\n", - "tarin_loder = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)\n", - "test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoche [1/10], Step [2000/12500], Loss: 2.2467\n", - "Epoche [1/10], Step [4000/12500], Loss: 2.2539\n", - "Epoche [1/10], Step [6000/12500], Loss: 2.2477\n", - "Epoche [1/10], Step [8000/12500], Loss: 2.4960\n", - "Epoche [1/10], Step [10000/12500], Loss: 2.4319\n", - "Epoche [1/10], Step [12000/12500], Loss: 2.3993\n", - "Epoche [2/10], Step [2000/12500], Loss: 1.7446\n", - "Epoche [2/10], Step [4000/12500], Loss: 1.7958\n", - "Epoche [2/10], Step [6000/12500], Loss: 1.9598\n", - "Epoche [2/10], Step [8000/12500], Loss: 2.0397\n", - "Epoche [2/10], Step [10000/12500], Loss: 1.8597\n", - "Epoche [2/10], Step [12000/12500], Loss: 1.1347\n", - "Epoche [3/10], Step [2000/12500], Loss: 1.2750\n", - "Epoche [3/10], Step [4000/12500], Loss: 1.4700\n", - "Epoche [3/10], Step [6000/12500], Loss: 0.8299\n", - "Epoche [3/10], Step [8000/12500], Loss: 2.4020\n", - "Epoche [3/10], Step [10000/12500], Loss: 0.8445\n", - "Epoche [3/10], Step [12000/12500], Loss: 1.2414\n", - "Epoche [4/10], Step [2000/12500], Loss: 1.1043\n", - "Epoche [4/10], Step [4000/12500], Loss: 1.6044\n", - "Epoche [4/10], Step [6000/12500], Loss: 1.8810\n", - "Epoche [4/10], Step [8000/12500], Loss: 1.4032\n", - "Epoche [4/10], Step [10000/12500], Loss: 1.9440\n", - "Epoche [4/10], Step [12000/12500], Loss: 1.4680\n", - "Epoche [5/10], Step [2000/12500], Loss: 0.9280\n", - "Epoche [5/10], Step [4000/12500], Loss: 1.0829\n", - "Epoche [5/10], Step [6000/12500], Loss: 0.8902\n", - "Epoche [5/10], Step [8000/12500], Loss: 1.4200\n", - "Epoche [5/10], Step [10000/12500], Loss: 2.7336\n", - "Epoche [5/10], Step [12000/12500], Loss: 1.7483\n", - "Epoche [6/10], Step [2000/12500], Loss: 1.4049\n", - "Epoche [6/10], Step [4000/12500], Loss: 0.8819\n", - "Epoche [6/10], Step [6000/12500], Loss: 2.0509\n", - "Epoche [6/10], Step [8000/12500], Loss: 1.5775\n", - "Epoche [6/10], Step [10000/12500], Loss: 1.4660\n", - "Epoche [6/10], Step [12000/12500], Loss: 1.2865\n", - "Epoche [7/10], Step [2000/12500], Loss: 1.3137\n", - "Epoche [7/10], Step [4000/12500], Loss: 1.3415\n", - "Epoche [7/10], Step [6000/12500], Loss: 0.9533\n", - "Epoche [7/10], Step [8000/12500], Loss: 1.1399\n", - "Epoche [7/10], Step [10000/12500], Loss: 1.3637\n", - "Epoche [7/10], Step [12000/12500], Loss: 0.6954\n", - "Epoche [8/10], Step [2000/12500], Loss: 0.9204\n", - "Epoche [8/10], Step [4000/12500], Loss: 1.1020\n", - "Epoche [8/10], Step [6000/12500], Loss: 1.0856\n", - "Epoche [8/10], Step [8000/12500], Loss: 1.2919\n", - "Epoche [8/10], Step [10000/12500], Loss: 0.5048\n", - "Epoche [8/10], Step [12000/12500], Loss: 1.6188\n", - "Epoche [9/10], Step [2000/12500], Loss: 0.7271\n", - "Epoche [9/10], Step [4000/12500], Loss: 0.8624\n", - "Epoche [9/10], Step [6000/12500], Loss: 1.6987\n", - "Epoche [9/10], Step [8000/12500], Loss: 0.8891\n", - "Epoche [9/10], Step [10000/12500], Loss: 1.0421\n", - "Epoche [9/10], Step [12000/12500], Loss: 0.9009\n", - "Epoche [10/10], Step [2000/12500], Loss: 1.8559\n", - "Epoche [10/10], Step [4000/12500], Loss: 1.9273\n", - "Epoche [10/10], Step [6000/12500], Loss: 1.1227\n", - "Epoche [10/10], Step [8000/12500], Loss: 1.3718\n", - "Epoche [10/10], Step [10000/12500], Loss: 1.6195\n", - "Epoche [10/10], Step [12000/12500], Loss: 0.9517\n", - "Finished training\n", - "Accuracy of the network: 58.32 %\n", - "Accuracy: 73.4 %\n", - "Accuracy: 60.8 %\n", - "Accuracy: 39.0 %\n", - "Accuracy: 38.7 %\n", - "Accuracy: 50.4 %\n", - "Accuracy: 38.7 %\n", - "Accuracy: 73.6 %\n", - "Accuracy: 66.8 %\n", - "Accuracy: 75.0 %\n", - "Accuracy: 66.8 %\n" - ] - } - ], - "source": [ - "class ConvNet(nn.Module):\n", - " def __init__(self) -> None:\n", - " super().__init__()\n", - " self.conv1 = nn.Conv2d(3, 6, 5)\n", - " self.pool = nn.MaxPool2d(2, 2)\n", - " self.conv2 = nn.Conv2d(6, 16, 5)\n", - " self.fc1 = nn.Linear(16 * 5 * 5, 120)\n", - " self.fc2 = nn.Linear(120, 84)\n", - " self.fc3 = nn.Linear(84, 10)\n", - "\n", - " def forward(self, x):\n", - " # -> n, 3, 32, 32\n", - " x = self.pool(F.relu(self.conv1(x))) # -> n, 6, 14, 14\n", - " x = self.pool(F.relu(self.conv2(x))) # -> n, 16, 5, 5\n", - " x = x.view(-1, 16 * 5 * 5) # -> n, 400\n", - " x = F.relu(self.fc1(x)) # -> n, 120\n", - " x = F.relu(self.fc2(x)) # -> n, 84\n", - " x = self.fc3(x) # -> n, 10\n", - " return x\n", - "\n", - "model = ConvNet()\n", - "model.to(device)\n", - "critertion = nn.CrossEntropyLoss()\n", - "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n", - "\n", - "n_total_steps = len(tarin_loder)\n", - "for epoch in range(num_epochs):\n", - " for i, (images, labels) in enumerate(tarin_loder):\n", - " images = images.to(device)\n", - " labels = labels.to(device)\n", - "\n", - " # Forward pass\n", - " outputs = model(images)\n", - " loss = critertion(outputs, labels)\n", - "\n", - " # Backward and optimize\n", - " optimizer.zero_grad()\n", - " loss.backward()\n", - " optimizer.step()\n", - "\n", - " if (i+1) % 2000 == 0:\n", - " print(f'Epoche [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')\n", - "print(\"Finished training\")\n", - "\n", - "\n", - "with torch.no_grad():\n", - " n_correct = 0\n", - " n_samples = 0\n", - " n_class_correct = [0 for i in range(10)]\n", - " n_class_samples = [0 for i in range(10)]\n", - " for images, labels in test_loader:\n", - " images = images.to(device)\n", - " labels = labels.to(device)\n", - " outputs = model(images)\n", - " # max returns (value ,index)\n", - " _, predicted = torch.max(outputs, 1)\n", - " n_samples += labels.size(0)\n", - " n_correct += (predicted == labels).sum().item()\n", - " \n", - " for i in range(batch_size):\n", - " label = labels[i]\n", - " pred = predicted[i]\n", - " if (label == pred):\n", - " n_class_correct[label] += 1\n", - " n_class_samples[label] += 1\n", - "\n", - " acc = 100.0 * n_correct / n_samples\n", - " print(f'Accuracy of the network: {acc} %')\n", - "\n", - " for i in range(10):\n", - " acc = 100.0 * n_class_correct[i] / n_class_samples[i]\n", - " print(f'Accuracy: {acc} %')\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/example/image_models/pytorch/cnn.ipynb b/example/image_models/pytorch/cnn.ipynb new file mode 100644 index 000000000..b11949ea9 --- /dev/null +++ b/example/image_models/pytorch/cnn.ipynb @@ -0,0 +1,246 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torchvision\n", + "from torchvision.transforms import v2\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/orifmilod/Desktop/projects/gigatorch/env/lib/python3.10/site-packages/torchvision/transforms/v2/_deprecated.py:41: UserWarning: The transform `ToTensor()` is deprecated and will be removed in a future release. Instead, please use `v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)])`.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100.0%\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting ./data/cifar-10-python.tar.gz to ./data\n", + "Files already downloaded and verified\n" + ] + } + ], + "source": [ + "# Device configuration\n", + "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", + "\n", + "num_epochs = 16\n", + "batch_size = 8\n", + "learning_rate = 0.001\n", + "\n", + "transform = v2.Compose(\n", + " [v2.ToTensor(),\n", + " v2.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n", + "\n", + "train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)\n", + "test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)\n", + "\n", + "tarin_loder = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)\n", + "test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoche [1/16], Step [2000/6250], Loss: 2.3213\n", + "Epoche [1/16], Step [4000/6250], Loss: 2.2904\n", + "Epoche [1/16], Step [6000/6250], Loss: 2.2973\n", + "Epoche [2/16], Step [2000/6250], Loss: 2.2808\n", + "Epoche [2/16], Step [4000/6250], Loss: 2.1523\n", + "Epoche [2/16], Step [6000/6250], Loss: 2.2306\n", + "Epoche [3/16], Step [2000/6250], Loss: 2.3663\n", + "Epoche [3/16], Step [4000/6250], Loss: 2.1092\n", + "Epoche [3/16], Step [6000/6250], Loss: 1.2913\n", + "Epoche [4/16], Step [2000/6250], Loss: 1.6886\n", + "Epoche [4/16], Step [4000/6250], Loss: 2.6763\n", + "Epoche [4/16], Step [6000/6250], Loss: 1.7113\n", + "Epoche [5/16], Step [2000/6250], Loss: 1.2147\n", + "Epoche [5/16], Step [4000/6250], Loss: 1.8217\n", + "Epoche [5/16], Step [6000/6250], Loss: 1.5523\n", + "Epoche [6/16], Step [2000/6250], Loss: 1.2940\n", + "Epoche [6/16], Step [4000/6250], Loss: 1.6208\n", + "Epoche [6/16], Step [6000/6250], Loss: 1.4940\n", + "Epoche [7/16], Step [2000/6250], Loss: 1.7596\n", + "Epoche [7/16], Step [4000/6250], Loss: 1.1859\n", + "Epoche [7/16], Step [6000/6250], Loss: 1.7896\n", + "Epoche [8/16], Step [2000/6250], Loss: 1.6281\n", + "Epoche [8/16], Step [4000/6250], Loss: 1.8791\n", + "Epoche [8/16], Step [6000/6250], Loss: 1.3839\n", + "Epoche [9/16], Step [2000/6250], Loss: 1.7757\n", + "Epoche [9/16], Step [4000/6250], Loss: 1.4942\n", + "Epoche [9/16], Step [6000/6250], Loss: 1.2496\n", + "Epoche [10/16], Step [2000/6250], Loss: 1.0990\n", + "Epoche [10/16], Step [4000/6250], Loss: 0.9228\n", + "Epoche [10/16], Step [6000/6250], Loss: 1.6966\n", + "Epoche [11/16], Step [2000/6250], Loss: 1.4388\n", + "Epoche [11/16], Step [4000/6250], Loss: 1.3887\n", + "Epoche [11/16], Step [6000/6250], Loss: 1.8761\n", + "Epoche [12/16], Step [2000/6250], Loss: 1.4832\n", + "Epoche [12/16], Step [4000/6250], Loss: 1.6177\n", + "Epoche [12/16], Step [6000/6250], Loss: 1.6273\n", + "Epoche [13/16], Step [2000/6250], Loss: 1.7118\n", + "Epoche [13/16], Step [4000/6250], Loss: 1.1288\n", + "Epoche [13/16], Step [6000/6250], Loss: 0.6880\n", + "Epoche [14/16], Step [2000/6250], Loss: 1.3681\n", + "Epoche [14/16], Step [4000/6250], Loss: 1.9006\n", + "Epoche [14/16], Step [6000/6250], Loss: 1.3762\n", + "Epoche [15/16], Step [2000/6250], Loss: 1.1387\n", + "Epoche [15/16], Step [4000/6250], Loss: 1.4813\n", + "Epoche [15/16], Step [6000/6250], Loss: 1.3308\n", + "Epoche [16/16], Step [2000/6250], Loss: 1.0389\n", + "Epoche [16/16], Step [4000/6250], Loss: 1.1721\n", + "Epoche [16/16], Step [6000/6250], Loss: 1.0445\n", + "Finished training\n", + "Accuracy of the network: 55.18 %\n", + "Accuracy: 3 56.6 %\n", + "Accuracy: 5 62.4 %\n", + "Accuracy: 3 60.8 %\n", + "Accuracy: 8 27.8 %\n", + "Accuracy: 3 39.0 %\n", + "Accuracy: 5 44.2 %\n", + "Accuracy: 1 60.9 %\n", + "Accuracy: 7 69.0 %\n" + ] + }, + { + "ename": "IndexError", + "evalue": "index 8 is out of bounds for dimension 0 with size 8", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[4], line 72\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m10\u001b[39m):\n\u001b[1;32m 71\u001b[0m acc \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m100.0\u001b[39m \u001b[38;5;241m*\u001b[39m n_class_correct[i] \u001b[38;5;241m/\u001b[39m n_class_samples[i]\n\u001b[0;32m---> 72\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAccuracy: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[43mlabels\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00macc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m %\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "\u001b[0;31mIndexError\u001b[0m: index 8 is out of bounds for dimension 0 with size 8" + ] + } + ], + "source": [ + "class ConvNet(nn.Module):\n", + " def __init__(self) -> None:\n", + " super().__init__()\n", + " self.conv1 = nn.Conv2d(3, 6, 5)\n", + " self.pool = nn.MaxPool2d(2, 2)\n", + " self.conv2 = nn.Conv2d(6, 16, 5)\n", + " self.fc1 = nn.Linear(16 * 5 * 5, 120)\n", + " self.fc2 = nn.Linear(120, 84)\n", + " self.fc3 = nn.Linear(84, 10)\n", + "\n", + " def forward(self, x):\n", + " # -> n, 3, 32, 32\n", + " x = self.pool(F.relu(self.conv1(x))) # -> n, 6, 14, 14\n", + " x = self.pool(F.relu(self.conv2(x))) # -> n, 16, 5, 5\n", + " x = x.view(-1, 16 * 5 * 5) # -> n, 400\n", + " x = F.relu(self.fc1(x)) # -> n, 120\n", + " x = F.relu(self.fc2(x)) # -> n, 84\n", + " x = self.fc3(x) # -> n, 10\n", + " return x\n", + "\n", + "model = ConvNet()\n", + "model.to(device)\n", + "critertion = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n", + "\n", + "n_total_steps = len(tarin_loder)\n", + "for epoch in range(num_epochs):\n", + " for i, (images, labels) in enumerate(tarin_loder):\n", + " images = images.to(device)\n", + " labels = labels.to(device)\n", + "\n", + " # Forward pass\n", + " outputs = model(images)\n", + " loss = critertion(outputs, labels)\n", + "\n", + " # Backward and optimize\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " if (i+1) % 2000 == 0:\n", + " print(f'Epoche [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')\n", + "print(\"Finished training\")\n", + "\n", + "\n", + "with torch.no_grad():\n", + " n_correct = 0\n", + " n_samples = 0\n", + " n_class_correct = [0 for i in range(10)]\n", + " n_class_samples = [0 for i in range(10)]\n", + " for images, labels in test_loader:\n", + " images = images.to(device)\n", + " labels = labels.to(device)\n", + " outputs = model(images)\n", + " # max returns (value ,index)\n", + " _, predicted = torch.max(outputs, 1)\n", + " n_samples += labels.size(0)\n", + " n_correct += (predicted == labels).sum().item()\n", + " \n", + " for i in range(batch_size):\n", + " label = labels[i]\n", + " pred = predicted[i]\n", + " if (label == pred):\n", + " n_class_correct[label] += 1\n", + " n_class_samples[label] += 1\n", + "\n", + " acc = 100.0 * n_correct / n_samples\n", + " print(f'Accuracy of the network: {acc} %')\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/example/language_model/gigatorch/CBOW.py b/example/language_model/gigatorch/CBOW.py new file mode 100644 index 000000000..2d9f53db3 --- /dev/null +++ b/example/language_model/gigatorch/CBOW.py @@ -0,0 +1,20 @@ +from gigatorch.embedding import Embedding, prepare_data, make_context_vector + + +def main(): + raw_text = """We are about to study the idea of a computational process. + Computational processes are abstract beings that inhabit computers. + As they evolve, processes manipulate other abstract things called data. + The evolution of a process is directed by a pattern of rules called a program. + People create programs to direct processes. In effect, + we conjure the spirits of the computer with our spells.""" + data, word_to_index, index_to_word = prepare_data(raw_text) + model = Embedding(100, 10) + + for target, context in data: + context_vector = make_context_vector(context, word_to_index) + print(model(context_vector)) + + +if __name__ == "__main__": + main() diff --git a/example/language_model/makemore_part2_mlp.ipynb b/example/language_model/makemore_part2_mlp.ipynb deleted file mode 100644 index 74696a1ae..000000000 --- a/example/language_model/makemore_part2_mlp.ipynb +++ /dev/null @@ -1,482 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import torch.nn.functional as F\n", - "import matplotlib.pyplot as plt # for making figures\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [], - "source": [ - "# read in all the words\n", - "words = open('names.txt', 'r').read().splitlines()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}\n" - ] - } - ], - "source": [ - "# build the vocabulary of characters and mappings to/from integers\n", - "chars = sorted(list(set(''.join(words))))\n", - "stoi = {s:i+1 for i,s in enumerate(chars)}\n", - "stoi['.'] = 0\n", - "itos = {i:s for s,i in stoi.items()}\n", - "print(itos)" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [], - "source": [ - "# build the dataset\n", - "\n", - "block_size = 3 # context length: how many characters do we take to predict the next one?\n", - "X, Y = [], []\n", - "for w in words:\n", - " \n", - " #print(w)\n", - " context = [0] * block_size\n", - " for ch in w + '.':\n", - " ix = stoi[ch]\n", - " X.append(context)\n", - " Y.append(ix)\n", - " #print(''.join(itos[i] for i in context), '--->', itos[ix])\n", - " context = context[1:] + [ix] # crop and append\n", - " \n", - "X = torch.tensor(X)\n", - "Y = torch.tensor(Y)" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(torch.Size([228146, 3]), torch.int64, torch.Size([228146]), torch.int64)" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "X.shape, X.dtype, Y.shape, Y.dtype" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "torch.Size([182625, 3]) torch.Size([182625])\n", - "torch.Size([22655, 3]) torch.Size([22655])\n", - "torch.Size([22866, 3]) torch.Size([22866])\n" - ] - } - ], - "source": [ - "# build the dataset\n", - "block_size = 3 # context length: how many characters do we take to predict the next one?\n", - "\n", - "def build_dataset(words): \n", - " X, Y = [], []\n", - " for w in words:\n", - "\n", - " #print(w)\n", - " context = [0] * block_size\n", - " for ch in w + '.':\n", - " ix = stoi[ch]\n", - " X.append(context)\n", - " Y.append(ix)\n", - " #print(''.join(itos[i] for i in context), '--->', itos[ix])\n", - " context = context[1:] + [ix] # crop and append\n", - "\n", - " X = torch.tensor(X)\n", - " Y = torch.tensor(Y)\n", - " print(X.shape, Y.shape)\n", - " return X, Y\n", - "\n", - "import random\n", - "random.seed(42)\n", - "random.shuffle(words)\n", - "n1 = int(0.8*len(words))\n", - "n2 = int(0.9*len(words))\n", - "\n", - "Xtr, Ytr = build_dataset(words[:n1])\n", - "Xdev, Ydev = build_dataset(words[n1:n2])\n", - "Xte, Yte = build_dataset(words[n2:])\n" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [], - "source": [ - "C = torch.randn((27, 2))" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "torch.Size([228146, 3, 2])" - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "emb = C[X]\n", - "emb.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [], - "source": [ - "g = torch.Generator().manual_seed(2147483647) # for reproducibility\n", - "embdedding_size = 10\n", - "C = torch.randn((27, embdedding_size), generator=g)\n", - "W1 = torch.randn((block_size * embdedding_size, 200), generator=g)\n", - "b1 = torch.randn(200, generator=g)\n", - "W2 = torch.randn((200, 27), generator=g)\n", - "b2 = torch.randn(27, generator=g)\n", - "parameters = [C, W1, b1, W2, b2]" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "11897" - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sum(p.nelement() for p in parameters) # number of parameters in total" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [], - "source": [ - "for p in parameters:\n", - " p.requires_grad = True" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1.971953272819519\n" - ] - } - ], - "source": [ - "epochs = 200000\n", - "# learning_rates = 10**(torch.linspace(-3, 0, epochs))\n", - "used_lrs = []\n", - "lossi = []\n", - "# stepi = []\n", - "for i in range(epochs):\n", - " \n", - " # minibatch construct\n", - " ix = torch.randint(0, Xtr.shape[0], (32,))\n", - " \n", - " # forward pass\n", - " emb = C[Xtr[ix]] # (32, 3, 10)\n", - " h = torch.tanh(emb.view(-1, embdedding_size * block_size) @ W1 + b1) # (32, 200)\n", - " logits = h @ W2 + b2 # (32, 27)\n", - " loss = F.cross_entropy(logits, Ytr[ix])\n", - " # print(loss.item())\n", - " \n", - " # backward pass\n", - " for p in parameters:\n", - " p.grad = None\n", - " loss.backward()\n", - " lr = 0.01 if i < epochs / 2 else 0.001\n", - " for p in parameters:\n", - " p.data -= lr * p.grad\n", - "\n", - " # track stats\n", - " used_lrs.append(i)\n", - " # stepi.append(i)\n", - " lossi.append(loss.item())\n", - "\n", - "print(loss.item())" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 82, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(used_lrs, lossi)" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor(2.3539, grad_fn=)" - ] - }, - "execution_count": 75, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "emb = C[Xtr] # (32, 3, 2)\n", - "h = torch.tanh(emb.view(-1, 30) @ W1 + b1) # (32, 100)\n", - "logits = h @ W2 + b2 # (32, 27)\n", - "loss = F.cross_entropy(logits, Ytr)\n", - "loss" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor(2.3602, grad_fn=)" - ] - }, - "execution_count": 76, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "emb = C[Xdev] # (32, 3, 2)\n", - "h = torch.tanh(emb.view(-1, 30) @ W1 + b1) # (32, 100)\n", - "logits = h @ W2 + b2 # (32, 27)\n", - "loss = F.cross_entropy(logits, Ydev)\n", - "loss" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# visualize dimensions 0 and 1 of the embedding matrix C for all characters\n", - "plt.figure(figsize=(8,8))\n", - "plt.scatter(C[:,0].data, C[:,1].data, s=200)\n", - "for i in range(C.shape[0]):\n", - " plt.text(C[i,0].item(), C[i,1].item(), itos[i], ha=\"center\", va=\"center\", color='white')\n", - "plt.grid('minor')" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": {}, - "outputs": [], - "source": [ - "# training split, dev/validation split, test split\n", - "# 80%, 10%, 10%" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "torch.Size([1, 3, 10])" - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "context = [0] * block_size\n", - "C[torch.tensor([context])].shape" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "carlah.\n", - "ambril.\n", - "khkimyli.\n", - "thiy.\n", - "salaysge.\n", - "mahnen.\n", - "deliy.\n", - "chigeni.\n", - "nelania.\n", - "chaiir.\n", - "kaleig.\n", - "dham.\n", - "jore.\n", - "quinn.\n", - "srojlir.\n", - "jamii.\n", - "wazelog.\n", - "jaryxi.\n", - "jaxeeni.\n", - "sayley.\n" - ] - } - ], - "source": [ - "\n", - "\n", - "# sample from the model\n", - "g = torch.Generator().manual_seed(2147483647 + 10)\n", - "\n", - "for _ in range(20):\n", - " \n", - " out = []\n", - " context = [0] * block_size # initialize with all ...\n", - " while True:\n", - " emb = C[torch.tensor([context])] # (1,block_size,d)\n", - " h = torch.tanh(emb.view(1, -1) @ W1 + b1)\n", - " logits = h @ W2 + b2\n", - " probs = F.softmax(logits, dim=1)\n", - " ix = torch.multinomial(probs, num_samples=1, generator=g).item()\n", - " context = context[1:] + [ix]\n", - " out.append(ix)\n", - " if ix == 0:\n", - " break\n", - " \n", - " print(''.join(itos[i] for i in out))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/example/language_model/pytorch/RNN.ipynb b/example/language_model/pytorch/RNN.ipynb new file mode 100644 index 000000000..a568a6480 --- /dev/null +++ b/example/language_model/pytorch/RNN.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Design criteria for modeling sequential data: \n", + "1. Handle variable-lenght sequence.\n", + "2. Track long-term depedencies.\n", + "3. Maintain information about order.\n", + "4. Share params accross the sequence.\n", + "\n", + "\n", + "RNN can fullfit these criteria and be used for sequence modeling.\n", + "\n", + "For instance, we can use it for:\n", + " - Many To One relation, where we take a sequence of tokens and then product a single reuslt, such as Sentiment Classification: where we take list of words\n", + " and predict the sentiment of the sentence.\n", + " - One to Many: Text generation: We input an image and the RNN would produce what the picture contains.\n", + " - Many to Many: Tanslation or Forecasting\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The CBOW model architecture tries to predict the current target word (the center word) based on the source context words. \n", + "\n", + "Considering a simple sentence, **“the quick brown fox jumps over the lazy dog”**, this can be pairs of `(context_window, target_word)` where if we consider a context window of size 2, we have examples like ([quick, fox], brown), ([the, brown], quick), ([the, dog], lazy) and so on. Thus the model tries to predict the target_word based on the context_window words." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/example/language_model/bigram.py b/example/language_model/pytorch/bigram.py similarity index 100% rename from example/language_model/bigram.py rename to example/language_model/pytorch/bigram.py diff --git a/example/language_model/pytorch/cbow.ipynb b/example/language_model/pytorch/cbow.ipynb new file mode 100644 index 000000000..cb4ac3c92 --- /dev/null +++ b/example/language_model/pytorch/cbow.ipynb @@ -0,0 +1,142 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "\n", + "CONTEXT_SIZE = 2 # 2 words to the left, 2 to the right\n", + "EMDEDDING_DIM = 100\n", + "\n", + "raw_text = \"\"\"We are about to study the idea of a computational process.\n", + "Computational processes are abstract beings that inhabit computers.\n", + "As they evolve, processes manipulate other abstract things called data.\n", + "The evolution of a process is directed by a pattern of rules called a program. \n", + "People create programs to direct processes. In effect,\n", + "we conjure the spirits of the computer with our spells.\"\"\".split()\n", + "\n", + "# By deriving a set from `raw_text`, we deduplicate the array\n", + "vocab = set(raw_text)\n", + "vocab_size = len(vocab)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Raw text: We are about to study the idea of a computational process. Computational processes are abstract beings that inhabit computers. As they evolve, processes manipulate other abstract things called data. The evolution of a process is directed by a pattern of rules called a program. People create programs to direct processes. In effect, we conjure the spirits of the computer with our spells.\n", + "\n", + "Context: ['People', 'create', 'to', 'direct']\n", + "\n", + "Prediction: programs\n" + ] + } + ], + "source": [ + "word_to_index = {word: ix for ix, word in enumerate(vocab)}\n", + "index_to_word = {ix: word for ix, word in enumerate(vocab)}\n", + "\n", + "data = []\n", + "\n", + "for i in range(CONTEXT_SIZE, len(raw_text) - CONTEXT_SIZE):\n", + " target = raw_text[i]\n", + " context = raw_text[i - CONTEXT_SIZE : i + CONTEXT_SIZE + 1]\n", + " data.append((context, target))\n", + "\n", + "\n", + "def make_context_vector(context, word_to_index):\n", + " idxs = [word_to_index[w] for w in context]\n", + " return torch.tensor(idxs, dtype=torch.long)\n", + "\n", + "\n", + "class CBOW(torch.nn.Module):\n", + " def __init__(self, vocab_size, embedding_dim):\n", + " super(CBOW, self).__init__()\n", + "\n", + " #out: 1 x emdedding_dim\n", + " self.embeddings = nn.Embedding(vocab_size, embedding_dim)\n", + " self.linear1 = nn.Linear(embedding_dim, 128)\n", + " self.activation_function1 = nn.ReLU()\n", + " \n", + " #out: 1 x vocab_size\n", + " self.linear2 = nn.Linear(128, vocab_size)\n", + " self.activation_function2 = nn.LogSoftmax(dim = -1)\n", + " \n", + "\n", + " def forward(self, inputs):\n", + " embeds = sum(self.embeddings(inputs)).view(1,-1)\n", + " out = self.linear1(embeds)\n", + " out = self.activation_function1(out)\n", + " out = self.linear2(out)\n", + " out = self.activation_function2(out)\n", + " return out\n", + "\n", + " def get_word_emdedding(self, word):\n", + " word = torch.tensor([word_to_index[word]])\n", + " return self.embeddings(word).view(1,-1)\n", + "\n", + "\n", + "model = CBOW(vocab_size, EMDEDDING_DIM)\n", + "\n", + "loss_function = nn.NLLLoss()\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=0.001)\n", + "\n", + "#TRAINING\n", + "for epoch in range(50):\n", + " total_loss = 0\n", + "\n", + " for context, target in data:\n", + " context_vector = make_context_vector(context, word_to_index) \n", + "\n", + " log_probs = model(context_vector)\n", + "\n", + " total_loss += loss_function(log_probs, torch.tensor([word_to_index[target]]))\n", + "\n", + " #optimize at the end of each epoch\n", + " optimizer.zero_grad()\n", + " total_loss.backward()\n", + " optimizer.step()\n", + "\n", + "#TESTING\n", + "context = ['People','create','to', 'direct']\n", + "context_vector = make_context_vector(context, word_to_index)\n", + "a = model(context_vector)\n", + "\n", + "#Print result\n", + "print(f'Raw text: {\" \".join(raw_text)}\\n')\n", + "print(f'Context: {context}\\n')\n", + "print(f'Prediction: {index_to_word[torch.argmax(a[0]).item()]}')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/example/language_model/language-model.py b/example/language_model/pytorch/language-model.py similarity index 100% rename from example/language_model/language-model.py rename to example/language_model/pytorch/language-model.py diff --git a/example/language_model/names.txt b/example/language_model/pytorch/names.txt similarity index 100% rename from example/language_model/names.txt rename to example/language_model/pytorch/names.txt diff --git a/example/language_model/rnn.py b/example/language_model/pytorch/rnn.py similarity index 100% rename from example/language_model/rnn.py rename to example/language_model/pytorch/rnn.py diff --git a/gigatorch/embedding.py b/gigatorch/embedding.py new file mode 100644 index 000000000..73baa28f9 --- /dev/null +++ b/gigatorch/embedding.py @@ -0,0 +1,37 @@ +from gigatorch import Tensor +from gigatorch.weight_init import WightInitializer +import numpy as np + + +class Embedding: + def __init__(self, vocab_size: int, embed_size: int): + self.vocab_size, self.embed_size = vocab_size, embed_size + # What should be fan_in fan_out here? + self.weight = WightInitializer.xavier_normal( + 1, 2, vocab_size, embed_size) + + def __call__(self, idx: Tensor) -> Tensor: + return (self.vocab_counter == idx.unsqueeze(2)).expand(*idx.shape, self.vocab_size) @ self.weight + + +@staticmethod +def prepare_data(raw_text, context_size=2): + raw_text = raw_text.split() + vocab = set(raw_text) + + word_to_index = {word: ix for ix, word in enumerate(vocab)} + index_to_word = {ix: word for ix, word in enumerate(vocab)} + + data = [] + for i in range(context_size, len(raw_text) - context_size): + target = raw_text[i] + context = raw_text[i - context_size: i + context_size + 1] + data.append((target, context)) + + return data, word_to_index, index_to_word + + +@staticmethod +def make_context_vector(context, word_to_index): + indexes = [word_to_index[w] for w in context] + return Tensor(indexes, dtype=np.long) diff --git a/requirements.txt b/requirements.txt index 6f7c10d10..a920635aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ contourpy==1.2.0 cycler==0.12.1 debugpy==1.8.1 decorator==5.1.1 +exceptiongroup==1.2.0 executing==2.0.1 filelock==3.13.1 fonttools==4.49.0 @@ -47,6 +48,7 @@ setuptools-black==0.1.5 six==1.16.0 stack-data==0.6.3 sympy==1.12 +tomli==2.0.1 torch==2.2.1 torchvision==0.17.1 tornado==6.4 diff --git a/tests/embedding_test.py b/tests/embedding_test.py new file mode 100644 index 000000000..b5766b828 --- /dev/null +++ b/tests/embedding_test.py @@ -0,0 +1,15 @@ + +def test_word_embedding(): + raw_text = """We are about to study the idea of a computational process. + Computational processes are abstract beings that inhabit computers. + As they evolve, processes manipulate other abstract things called data. + The evolution of a process is directed by a pattern of rules called a program. + People create programs to direct processes. In effect, + we conjure the spirits of the computer with our spells.""".split() + + # By deriving a set from `raw_text`, we deduplicate the array + vocab = set(raw_text) + vocab_size = len(vocab) + + word_to_index = {word: ix for ix, word in enumerate(vocab)} + index_to_word = {ix: word for ix, word in enumerate(vocab)}