Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cs674 #124

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open

Cs674 #124

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
769 changes: 769 additions & 0 deletions .gitignore

Large diffs are not rendered by default.

Binary file added Loss.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1,316 changes: 1,316 additions & 0 deletions changes.diff

Large diffs are not rendered by default.

50 changes: 29 additions & 21 deletions demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -174,30 +174,38 @@
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\weste\\anaconda3\\envs\\minigpt\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"iter_dt 0.00ms; iter 0: train loss 1.06407\n",
"iter_dt 18.17ms; iter 100: train loss 0.14712\n",
"iter_dt 18.70ms; iter 200: train loss 0.05315\n",
"iter_dt 19.65ms; iter 300: train loss 0.04404\n",
"iter_dt 31.64ms; iter 400: train loss 0.04724\n",
"iter_dt 18.43ms; iter 500: train loss 0.02521\n",
"iter_dt 19.83ms; iter 600: train loss 0.03352\n",
"iter_dt 19.58ms; iter 700: train loss 0.00539\n",
"iter_dt 18.72ms; iter 800: train loss 0.02057\n",
"iter_dt 18.26ms; iter 900: train loss 0.00360\n",
"iter_dt 18.50ms; iter 1000: train loss 0.00788\n",
"iter_dt 20.64ms; iter 1100: train loss 0.01162\n",
"iter_dt 18.63ms; iter 1200: train loss 0.00963\n",
"iter_dt 18.32ms; iter 1300: train loss 0.02066\n",
"iter_dt 18.40ms; iter 1400: train loss 0.01739\n",
"iter_dt 18.37ms; iter 1500: train loss 0.00376\n",
"iter_dt 18.67ms; iter 1600: train loss 0.00133\n",
"iter_dt 18.38ms; iter 1700: train loss 0.00179\n",
"iter_dt 18.66ms; iter 1800: train loss 0.00079\n",
"iter_dt 18.48ms; iter 1900: train loss 0.00042\n"
"iter_dt 0.00ms; iter 0: train loss 1.06126\n",
"iter_dt 13.03ms; iter 100: train loss 0.13979\n",
"iter_dt 14.00ms; iter 200: train loss 0.06111\n",
"iter_dt 22.00ms; iter 300: train loss 0.03509\n",
"iter_dt 17.00ms; iter 400: train loss 0.01621\n",
"iter_dt 15.00ms; iter 500: train loss 0.00580\n",
"iter_dt 13.00ms; iter 600: train loss 0.01753\n",
"iter_dt 14.00ms; iter 700: train loss 0.00683\n",
"iter_dt 14.00ms; iter 800: train loss 0.01776\n",
"iter_dt 12.83ms; iter 900: train loss 0.02665\n",
"iter_dt 14.00ms; iter 1000: train loss 0.00721\n",
"iter_dt 13.00ms; iter 1100: train loss 0.00226\n",
"iter_dt 14.00ms; iter 1200: train loss 0.02251\n",
"iter_dt 14.00ms; iter 1300: train loss 0.00298\n",
"iter_dt 14.00ms; iter 1400: train loss 0.01413\n",
"iter_dt 14.00ms; iter 1500: train loss 0.01876\n",
"iter_dt 14.03ms; iter 1600: train loss 0.00173\n",
"iter_dt 14.00ms; iter 1700: train loss 0.02282\n",
"iter_dt 14.00ms; iter 1800: train loss 0.00102\n",
"iter_dt 12.97ms; iter 1900: train loss 0.01723\n"
]
}
],
Expand Down Expand Up @@ -317,7 +325,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
"version": "3.11.5"
},
"orig_nbformat": 4,
"vscode": {
Expand Down
Empty file added final.diff
Empty file.
41 changes: 37 additions & 4 deletions generate.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,16 @@
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\weste\\anaconda3\\envs\\minigpt\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import torch\n",
"from transformers import GPT2Tokenizer, GPT2LMHeadModel\n",
Expand Down Expand Up @@ -43,6 +52,30 @@
"text": [
"number of parameters: 1557.61M\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading (…)lve/main/config.json: 100%|██████████| 689/689 [00:00<00:00, 686kB/s]\n",
"c:\\Users\\weste\\anaconda3\\envs\\minigpt\\Lib\\site-packages\\huggingface_hub\\file_download.py:137: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\weste\\.cache\\huggingface\\hub. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
"To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
" warnings.warn(message)\n",
"Downloading model.safetensors: 100%|██████████| 6.43G/6.43G [01:43<00:00, 62.3MB/s]\n",
"Downloading (…)neration_config.json: 100%|██████████| 124/124 [00:00<00:00, 124kB/s]\n"
]
},
{
"ename": "AssertionError",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32md:\\DeepLearningProjects\\minGPT\\generate.ipynb Cell 4\u001b[0m line \u001b[0;36m2\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/DeepLearningProjects/minGPT/generate.ipynb#W3sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mif\u001b[39;00m use_mingpt:\n\u001b[1;32m----> <a href='vscode-notebook-cell:/d%3A/DeepLearningProjects/minGPT/generate.ipynb#W3sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m model \u001b[39m=\u001b[39m GPT\u001b[39m.\u001b[39;49mfrom_pretrained(model_type)\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/DeepLearningProjects/minGPT/generate.ipynb#W3sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/DeepLearningProjects/minGPT/generate.ipynb#W3sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m model \u001b[39m=\u001b[39m GPT2LMHeadModel\u001b[39m.\u001b[39mfrom_pretrained(model_type)\n",
"File \u001b[1;32md:\\DeepLearningProjects\\minGPT\\mingpt\\model.py:201\u001b[0m, in \u001b[0;36mGPT.from_pretrained\u001b[1;34m(cls, model_type)\u001b[0m\n\u001b[0;32m 198\u001b[0m transposed \u001b[39m=\u001b[39m [\u001b[39m'\u001b[39m\u001b[39mattn.c_attn.weight\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mattn.c_proj.weight\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mmlp.c_fc.weight\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mmlp.c_proj.weight\u001b[39m\u001b[39m'\u001b[39m]\n\u001b[0;32m 199\u001b[0m \u001b[39m# basically the openai checkpoints use a \"Conv1D\" module, but we only want to use a vanilla nn.Linear.\u001b[39;00m\n\u001b[0;32m 200\u001b[0m \u001b[39m# this means that we have to transpose these weights when we import them\u001b[39;00m\n\u001b[1;32m--> 201\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mlen\u001b[39m(keys) \u001b[39m==\u001b[39m \u001b[39mlen\u001b[39m(sd)\n\u001b[0;32m 202\u001b[0m \u001b[39mfor\u001b[39;00m k \u001b[39min\u001b[39;00m keys:\n\u001b[0;32m 203\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39many\u001b[39m(k\u001b[39m.\u001b[39mendswith(w) \u001b[39mfor\u001b[39;00m w \u001b[39min\u001b[39;00m transposed):\n\u001b[0;32m 204\u001b[0m \u001b[39m# special treatment for the Conv1D weights we need to transpose\u001b[39;00m\n",
"\u001b[1;31mAssertionError\u001b[0m: "
]
}
],
"source": [
Expand All @@ -59,7 +92,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -100,7 +133,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -152,7 +185,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
"version": "3.11.5"
},
"orig_nbformat": 4,
"vscode": {
Expand Down
12 changes: 12 additions & 0 deletions gpt_train.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
#SBATCH --time=05:10:00
#SBATCH --mem=80G
#SBATCH --gpus=1
#SBATCH --nodes=1
#SBATCH --qos=cs
#SBATCH --partition=cs
source min-gpt-env/bin/activate
cd ~/minGPT/mingpt
nvidia-smi --list-gpus
nvidia-smi --query-gpu=memory.total --format=csv
python jsonl_dataset.py
82 changes: 82 additions & 0 deletions input.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@

ChatGPT
FADE IN:

NARRATOR (V.O.)
(ominous, echoing voice)
In a time, where universes collide and the most unlikely of tales come to life...

CUT TO:

Epic scenes of the One Ring falling into a swirling vortex.

NARRATOR (V.O.)
...an unexpected hero emerges from the shadow.

The vortex swirls violently and in a flash of light, Gollum lands on a street in modern day Chicago.

CUT TO:

Scenes of Gollum scared and overwhelmed by the tall skyscrapers, traffic, and people.

NARRATOR (V.O.)
In a world he doesn't understand...

CUT TO:

Gollum in an alleyway, attempting to catch a rat, startling a group of young kids.

NARRATOR (V.O.)
...he will find allies in the unlikeliest places.

The kids, scared at first, gradually approach Gollum. They offer him a slice of pizza.

NARRATOR (V.O.)
...and they will teach him more than he ever expected.

MONTAGE:

Scenes of the kids showing Gollum around - teaching him how to cross the street, playing basketball, eating at a hot dog stand. Gollum trying and failing at modern conveniences like smartphones, vending machines, and escalators, resulting in comedic moments.

CUT TO:

A heartwarming scene of the kids defending Gollum from a local bully.

NARRATOR (V.O.)
Friendship. Courage. Loyalty.

Gollum, seeing the kids in danger, steps up and roars at the bullies.

NARRATOR (V.O.)
In a quest to find his place in this new world...

Gollum, on the top of a skyscraper, looks out at the Chicago skyline.

NARRATOR (V.O.)
...he will discover a power greater than any ring.

Quick shots of exciting chase scenes, a mysterious dark force pursuing Gollum and the kids.

FADE TO BLACK:

Title fades in:

"GOLLUM IN THE WINDY CITY"

NARRATOR (V.O.)
This summer... get ready for the adventure of a lifetime.

CUT TO:

Gollum with a Chicago Bulls cap, holding a basketball.

GOLLUM
(voiceover, excited)
"Prescious... we likes this game!"

The trailer ends with a comic scene of Gollum trying to dunk, but instead hanging off the hoop.

NARRATOR (V.O.)
Coming soon.

FADE OUT.
155 changes: 155 additions & 0 deletions mingpt/jsonl_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from transformers import GPT2Tokenizer



import torch
from torch.utils.data import Dataset

from mingpt.model import GPT
from mingpt.trainer import Trainer
from mingpt.utils import set_seed, setup_logging, CfgNode as CN


def get_config():

C = CN()

# system
C.system = CN()
C.system.seed = 3407
C.system.work_dir = './out/chargpt'

# data
C.data = JSONL_Dataset.get_default_config()

# model
C.model = GPT.get_default_config()
C.model.model_type = 'gpt-mini'

# trainer
C.trainer = Trainer.get_default_config()
C.trainer.learning_rate = 5e-4
C.trainer.max_iters = 1
C.trainer.batch_size = 4

return C


class JSONL_Dataset(Dataset):

@staticmethod
def get_default_config():
C = CN()
C.block_size = 64
return C

def __init__(self, file_path, block_size):
super().__init__()
self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
self.tokenizer.pad_token = self.tokenizer.eos_token
self.file_path = os.path.normpath(file_path)
self.block_size = block_size
self.jsonlines = []
with pd.read_json(self.file_path, lines=True, chunksize=1) as reader:
reader
for chunk in reader:
chunklet = chunk['text'].tolist()[0]
if isinstance(chunklet, str):
self.jsonlines.append(self.tokenizer(chunklet, padding='max_length', truncation=True, max_length=self.block_size)['input_ids'])


def __len__(self):
return len(self.jsonlines)

def __getitem__(self, idx):
token = self.jsonlines[idx]
x = torch.tensor(token[:-1], dtype=torch.long)
y = torch.tensor(token[1:], dtype=torch.long)
return x, y

def get_vocab_size(self):
return self.tokenizer.vocab_size

def get_block_size(self):
return self.block_size

def text_to_token(self, str):
return [self.tokenizer(str, padding='max_length', truncation=True, max_length=self.block_size)['input_ids']]

def token_to_text(self, tokens):
return self.tokenizer.decode(tokens)


if __name__ == "__main__":
#file_path = os.path.abspath("/lustre/scratch/usr/dw87/pile_data_10.jsonl")
file_path = os.path.join(Path().cwd(), "test_pile_file.jsonl")

# get default config and overrides from the command line, if any
config = get_config()
config.merge_from_args(sys.argv[1:])
print(config)
setup_logging(config)
set_seed(config.system.seed)

# construct the training dataset
train_dataset = JSONL_Dataset(file_path, 100)

print(train_dataset.__getitem__(0))

"""
iter = []
loss = []

# construct the model
config.model.vocab_size = train_dataset.get_vocab_size()
config.model.block_size = train_dataset.get_block_size()
model = GPT(config.model)

# construct the trainer object
trainer = Trainer(config.trainer, model, train_dataset)

# iteration callback
def batch_end_callback(trainer):

if trainer.iter_num % 10 == 0:
print(f"iter_dt {trainer.iter_dt * 1000:.2f}ms; iter {trainer.iter_num}: train loss {trainer.loss.item():.5f}")
loss.append(trainer.loss.item())
iter.append(trainer.iter_num)

if trainer.iter_num % 500 == 0:
# evaluate both the train and test score
model.eval()
with torch.no_grad():
pass
#sample from the model...
#context = "o god o god"
#x = torch.tensor(train_dataset.text_to_token(context), dtype=torch.long).to(trainer.device)
#y = model.generate(x, 500, temperature=1.0, do_sample=True, top_k=10)[0]
#completion = ''.join(train_dataset.token_to_text(y))
#print(completion)
# save the latest model
print("saving model")
#ckpt_path = os.path.join(config.system.work_dir, "model.pt")
#torch.save(model.state_dict(), ckpt_path)
model.save('my_model.pth')
# revert model to training mode
model.train()

trainer.set_callback('on_batch_end', batch_end_callback)

# run the optimization
trainer.run()


plt.plot(iter, loss)
plt.xlabel('iteration')
plt.ylabel('loss')
plt.title('Training Loss')
plt.savefig("Loss.png")
"""

Loading