From 75a68440520c3b60d7ca1c8bbc0cae2c823d7abb Mon Sep 17 00:00:00 2001 From: SEddula Date: Mon, 22 Dec 2025 23:56:26 -0600 Subject: [PATCH 01/34] updated Liquid model name in lab 3 --- lab3/LLM_Finetuning.ipynb | 2 +- lab3/solutions/LLM_Finetuning_Solution.ipynb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lab3/LLM_Finetuning.ipynb b/lab3/LLM_Finetuning.ipynb index 022caadc..137c4955 100644 --- a/lab3/LLM_Finetuning.ipynb +++ b/lab3/LLM_Finetuning.ipynb @@ -930,7 +930,7 @@ "OPENROUTER_API_KEY = \"\" # TODO: add your OpenRouter API key here\n", "assert OPENROUTER_API_KEY != \"\", \"You must set your OpenRouter API key before running this cell!\"\n", "\n", - "model_name = \"liquid/lfm-40b\"\n", + "model_name = \"liquid/lfm2-8b-a1b\"\n", "# model_name = \"google/gemma-2-9b-it\"\n", "llm = mdl.lab3.LLMClient(model=model_name, api_key=OPENROUTER_API_KEY)" ] diff --git a/lab3/solutions/LLM_Finetuning_Solution.ipynb b/lab3/solutions/LLM_Finetuning_Solution.ipynb index 04548c20..428e8b30 100644 --- a/lab3/solutions/LLM_Finetuning_Solution.ipynb +++ b/lab3/solutions/LLM_Finetuning_Solution.ipynb @@ -947,7 +947,7 @@ "OPENROUTER_API_KEY = \"\" # TODO: add your OpenRouter API key here\n", "assert OPENROUTER_API_KEY != \"\", \"You must set your OpenRouter API key before running this cell!\"\n", "\n", - "model_name = \"liquid/lfm-40b\"\n", + "model_name = \"liquid/lfm2-8b-a1b\"\n", "# model_name = \"google/gemma-2-9b-it\"\n", "llm = mdl.lab3.LLMClient(model=model_name, api_key=OPENROUTER_API_KEY)" ] From cc8abbd86c3d55b2deacec2fdc320a52ea835227 Mon Sep 17 00:00:00 2001 From: SEddula Date: Mon, 22 Dec 2025 23:59:20 -0600 Subject: [PATCH 02/34] should be state instead of hidden_state as per solutions --- lab1/PT_Part2_Music_Generation.ipynb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lab1/PT_Part2_Music_Generation.ipynb b/lab1/PT_Part2_Music_Generation.ipynb index a99bca7d..afbcbfbb 100644 --- a/lab1/PT_Part2_Music_Generation.ipynb +++ b/lab1/PT_Part2_Music_Generation.ipynb @@ -652,6 +652,11 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GuGUJB0ZT_Uo" + }, + "outputs": [], "source": [ "### compute the loss on the predictions from the untrained model from earlier. ###\n", "y.shape # (batch_size, sequence_length)\n", @@ -663,12 +668,7 @@ "\n", "print(f\"Prediction shape: {pred.shape} # (batch_size, sequence_length, vocab_size)\")\n", "print(f\"scalar_loss: {example_batch_loss.mean().item()}\")" - ], - "metadata": { - "id": "GuGUJB0ZT_Uo" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -906,7 +906,7 @@ "\n", " for i in tqdm(range(generation_length)):\n", " '''TODO: evaluate the inputs and generate the next character predictions'''\n", - " predictions, hidden_state = model('''TODO''', '''TODO''', return_state=True) # TODO\n", + " predictions, state = model('''TODO''', '''TODO''', return_state=True) # TODO\n", "\n", " # Remove the batch dimension\n", " predictions = predictions.squeeze(0)\n", From 84e329f69f6cfc63d326675e3a6961b4465b194b Mon Sep 17 00:00:00 2001 From: SEddula Date: Tue, 23 Dec 2025 15:24:19 -0600 Subject: [PATCH 03/34] ptp was removed from the ndarray class in NumPy 2.0. Use np.ptp(arr, ...) instead. --- lab2/TF_Part2_Debiasing.ipynb | 170 +++++++++--------- .../TF_Part2_Debiasing_Solution.ipynb | 170 +++++++++--------- 2 files changed, 170 insertions(+), 170 deletions(-) diff --git a/lab2/TF_Part2_Debiasing.ipynb b/lab2/TF_Part2_Debiasing.ipynb index 494b74c7..32ea5c3c 100644 --- a/lab2/TF_Part2_Debiasing.ipynb +++ b/lab2/TF_Part2_Debiasing.ipynb @@ -1,21 +1,4 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "TF_Part2_Debiasing.ipynb", - "provenance": [], - "collapsed_sections": [ - "Ag_e7xtTzT1W", - "NDj7KBaW8Asz" - ] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "accelerator": "GPU" - }, "cells": [ { "cell_type": "markdown", @@ -38,9 +21,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "rNbf1pRlSDby" }, + "outputs": [], "source": [ "# Copyright 2025 MIT 6.S191 Introduction to Deep Learning. All Rights Reserved.\n", "#\n", @@ -51,9 +36,7 @@ "# © MIT 6.S191: Introduction to Deep Learning\n", "# http://introtodeeplearning.com\n", "#" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -77,15 +60,15 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "XQh5HZfbupFF" }, + "outputs": [], "source": [ "import IPython\n", "IPython.display.YouTubeVideo('59bMh59JQDo')" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -104,9 +87,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "E46sWVKK6LP9" }, + "outputs": [], "source": [ "!pip install comet_ml --quiet\n", "import comet_ml\n", @@ -130,9 +115,7 @@ "# using Runtime > Change Runtime Type > GPU\n", "assert len(tf.config.list_physical_devices('GPU')) > 0\n", "assert COMET_API_KEY != \"\", \"Please insert your Comet API Key\"" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -153,17 +136,17 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "RWXaaIWy6jVw" }, + "outputs": [], "source": [ "# Get the training data: both images from CelebA and ImageNet\n", "path_to_training_data = tf.keras.utils.get_file('train_face.h5', 'https://www.dropbox.com/s/hlz8atheyozp1yx/train_face.h5?dl=1')\n", "# Instantiate a TrainingDatasetLoader using the downloaded dataset\n", "loader = mdl.lab2.TrainingDatasetLoader(path_to_training_data)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -176,15 +159,15 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "DjPSjZZ_bGqe" }, + "outputs": [], "source": [ "number_of_training_examples = loader.get_train_size()\n", "(images, labels) = loader.get_batch(100)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -197,9 +180,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "Jg17jzwtbxDA" }, + "outputs": [], "source": [ "### Examining the CelebA training dataset ###\n", "\n", @@ -219,9 +204,7 @@ "plt.subplot(1, 2, 2)\n", "plt.imshow(not_face_images[idx_not_face])\n", "plt.title(\"Not Face\"); plt.grid(False)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -253,9 +236,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "82EVTAAW7B_X" }, + "outputs": [], "source": [ "### Define the CNN model ###\n", "\n", @@ -288,9 +273,7 @@ " return model\n", "\n", "standard_classifier = make_standard_classifier()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -303,6 +286,11 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mi-04SAfK6lm" + }, + "outputs": [], "source": [ "### Create a Comet experiment to track our training run ###\n", "def create_experiment(project_name, params):\n", @@ -320,18 +308,15 @@ " experiment.flush()\n", "\n", " return experiment\n" - ], - "metadata": { - "id": "mi-04SAfK6lm" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "eJlDGh1o31G1" }, + "outputs": [], "source": [ "### Train the standard CNN ###\n", "\n", @@ -376,9 +361,7 @@ "\n", " experiment.log_metric(\"loss\", loss.numpy().mean(), step=step)\n", " step += 1" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -393,9 +376,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "35-PDgjdWk6_" }, + "outputs": [], "source": [ "### Evaluation of standard CNN ###\n", "\n", @@ -406,9 +391,7 @@ "acc_standard = tf.reduce_mean(tf.cast(tf.equal(batch_y, y_pred_standard), tf.float32))\n", "\n", "print(\"Standard CNN accuracy on (potentially biased) training set: {:.4f}\".format(acc_standard.numpy()))" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -423,9 +406,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "vfDD8ztGWk6x" }, + "outputs": [], "source": [ "### Load test dataset and plot examples ###\n", "\n", @@ -435,9 +420,7 @@ " plt.figure(figsize=(5,5))\n", " plt.imshow(np.hstack(group))\n", " plt.title(key, fontsize=15)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -450,9 +433,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "GI4O0Y1GAot9" }, + "outputs": [], "source": [ "### Evaluate the standard CNN on the test data ###\n", "\n", @@ -464,11 +449,9 @@ "yy = standard_classifier_probs.numpy().mean(1)\n", "plt.bar(xx, yy)\n", "plt.xticks(xx, keys)\n", - "plt.ylim(max(0,yy.min()-yy.ptp()/2.), yy.max()+yy.ptp()/2.)\n", + "plt.ylim(max(0,yy.min()-np.ptp(yy)/2.), yy.max()+np.ptp(yy)/2.)\n", "plt.title(\"Standard classifier predictions\");" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -552,9 +535,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "S00ASo1ImSuh" }, + "outputs": [], "source": [ "### Defining the VAE loss function ###\n", "\n", @@ -583,9 +568,7 @@ " vae_loss = # TODO\n", "\n", " return vae_loss" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -615,9 +598,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "cT6PGdNajl3K" }, + "outputs": [], "source": [ "### VAE Reparameterization ###\n", "\n", @@ -637,9 +622,7 @@ " z = # TODO\n", "\n", " return z" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -698,9 +681,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "VjieDs8Ovcqs" }, + "outputs": [], "source": [ "### Loss function for DB-VAE ###\n", "\n", @@ -734,9 +719,7 @@ " total_loss = # TODO\n", "\n", " return total_loss, classification_loss" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -753,9 +736,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "JfWPHGrmyE7R" }, + "outputs": [], "source": [ "### Define the decoder portion of the DB-VAE ###\n", "\n", @@ -784,9 +769,7 @@ " ])\n", "\n", " return decoder" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -799,9 +782,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "dSFDcFBL13c3" }, + "outputs": [], "source": [ "### Defining and creating the DB-VAE ###\n", "\n", @@ -863,9 +848,7 @@ " return y_logit\n", "\n", "dbvae = DB_VAE(latent_dim)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -903,9 +886,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "ewWbf7TE7wVc" }, + "outputs": [], "source": [ "# Function to return the means for an input image batch\n", "def get_latent_mu(images, dbvae, batch_size=1024):\n", @@ -917,9 +902,7 @@ " _, batch_mu, _ = dbvae.encode(batch)\n", " mu[start_ind:end_ind] = batch_mu\n", " return mu" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -932,9 +915,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "HiX9pmmC7_wn" }, + "outputs": [], "source": [ "### Resampling algorithm for DB-VAE ###\n", "\n", @@ -983,9 +968,7 @@ " training_sample_p /= np.sum(training_sample_p)\n", "\n", " return training_sample_p" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -998,9 +981,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "xwQs-Gu5bKEK" }, + "outputs": [], "source": [ "### Training the DB-VAE ###\n", "\n", @@ -1070,9 +1055,7 @@ " step += 1\n", "\n", "experiment.end()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -1096,9 +1079,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "bgK77aB9oDtX" }, + "outputs": [], "source": [ "dbvae_logits = [dbvae.predict(np.array(x, dtype=np.float32)) for x in test_faces]\n", "dbvae_probs = tf.squeeze(tf.sigmoid(dbvae_logits))\n", @@ -1109,9 +1094,7 @@ "plt.xticks(xx, keys);\n", "plt.title(\"Network predictions on test dataset\")\n", "plt.ylabel(\"Probability\"); plt.legend(bbox_to_anchor=(1.04,1), loc=\"upper left\");\n" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -1143,5 +1126,22 @@ "" ] } - ] + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "Ag_e7xtTzT1W", + "NDj7KBaW8Asz" + ], + "name": "TF_Part2_Debiasing.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/lab2/solutions/TF_Part2_Debiasing_Solution.ipynb b/lab2/solutions/TF_Part2_Debiasing_Solution.ipynb index d6ccba60..bbc8b78a 100644 --- a/lab2/solutions/TF_Part2_Debiasing_Solution.ipynb +++ b/lab2/solutions/TF_Part2_Debiasing_Solution.ipynb @@ -1,21 +1,4 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "TF_Part2_Debiasing_Solution.ipynb", - "provenance": [], - "collapsed_sections": [ - "Ag_e7xtTzT1W", - "NDj7KBaW8Asz" - ] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "accelerator": "GPU" - }, "cells": [ { "cell_type": "markdown", @@ -38,9 +21,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "rNbf1pRlSDby" }, + "outputs": [], "source": [ "# Copyright 2025 MIT 6.S191 Introduction to Deep Learning. All Rights Reserved.\n", "#\n", @@ -51,9 +36,7 @@ "# © MIT 6.S191: Introduction to Deep Learning\n", "# http://introtodeeplearning.com\n", "#" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -77,15 +60,15 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "XQh5HZfbupFF" }, + "outputs": [], "source": [ "import IPython\n", "IPython.display.YouTubeVideo('59bMh59JQDo')" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -104,9 +87,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "E46sWVKK6LP9" }, + "outputs": [], "source": [ "!pip install comet_ml --quiet\n", "import comet_ml\n", @@ -130,9 +115,7 @@ "# using Runtime > Change Runtime Type > GPU\n", "assert len(tf.config.list_physical_devices('GPU')) > 0\n", "assert COMET_API_KEY != \"\", \"Please insert your Comet API Key\"" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -153,17 +136,17 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "RWXaaIWy6jVw" }, + "outputs": [], "source": [ "# Get the training data: both images from CelebA and ImageNet\n", "path_to_training_data = tf.keras.utils.get_file('train_face.h5', 'https://www.dropbox.com/s/hlz8atheyozp1yx/train_face.h5?dl=1')\n", "# Instantiate a TrainingDatasetLoader using the downloaded dataset\n", "loader = mdl.lab2.TrainingDatasetLoader(path_to_training_data)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -176,15 +159,15 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "DjPSjZZ_bGqe" }, + "outputs": [], "source": [ "number_of_training_examples = loader.get_train_size()\n", "(images, labels) = loader.get_batch(100)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -197,9 +180,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "Jg17jzwtbxDA" }, + "outputs": [], "source": [ "### Examining the CelebA training dataset ###\n", "\n", @@ -219,9 +204,7 @@ "plt.subplot(1, 2, 2)\n", "plt.imshow(not_face_images[idx_not_face])\n", "plt.title(\"Not Face\"); plt.grid(False)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -253,9 +236,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "82EVTAAW7B_X" }, + "outputs": [], "source": [ "### Define the CNN model ###\n", "\n", @@ -288,9 +273,7 @@ " return model\n", "\n", "standard_classifier = make_standard_classifier()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -303,6 +286,11 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mi-04SAfK6lm" + }, + "outputs": [], "source": [ "### Create a Comet experiment to track our training run ###\n", "def create_experiment(project_name, params):\n", @@ -320,18 +308,15 @@ " experiment.flush()\n", "\n", " return experiment\n" - ], - "metadata": { - "id": "mi-04SAfK6lm" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "eJlDGh1o31G1" }, + "outputs": [], "source": [ "### Train the standard CNN ###\n", "\n", @@ -376,9 +361,7 @@ "\n", " experiment.log_metric(\"loss\", loss.numpy().mean(), step=step)\n", " step += 1" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -393,9 +376,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "35-PDgjdWk6_" }, + "outputs": [], "source": [ "### Evaluation of standard CNN ###\n", "\n", @@ -406,9 +391,7 @@ "acc_standard = tf.reduce_mean(tf.cast(tf.equal(batch_y, y_pred_standard), tf.float32))\n", "\n", "print(\"Standard CNN accuracy on (potentially biased) training set: {:.4f}\".format(acc_standard.numpy()))" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -423,9 +406,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "vfDD8ztGWk6x" }, + "outputs": [], "source": [ "### Load test dataset and plot examples ###\n", "\n", @@ -435,9 +420,7 @@ " plt.figure(figsize=(5,5))\n", " plt.imshow(np.hstack(group))\n", " plt.title(key, fontsize=15)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -450,9 +433,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "GI4O0Y1GAot9" }, + "outputs": [], "source": [ "### Evaluate the standard CNN on the test data ###\n", "\n", @@ -464,11 +449,9 @@ "yy = standard_classifier_probs.numpy().mean(1)\n", "plt.bar(xx, yy)\n", "plt.xticks(xx, keys)\n", - "plt.ylim(max(0,yy.min()-yy.ptp()/2.), yy.max()+yy.ptp()/2.)\n", + "plt.ylim(max(0,yy.min()-np.ptp(yy)/2.), yy.max()+np.ptp(yy)/2.)\n", "plt.title(\"Standard classifier predictions\");" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -552,9 +535,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "S00ASo1ImSuh" }, + "outputs": [], "source": [ "### Defining the VAE loss function ###\n", "\n", @@ -586,9 +571,7 @@ " # vae_loss = # TODO\n", "\n", " return vae_loss" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -618,9 +601,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "cT6PGdNajl3K" }, + "outputs": [], "source": [ "### VAE Reparameterization ###\n", "\n", @@ -640,9 +625,7 @@ " z = z_mean + tf.math.exp(0.5 * z_logsigma) * epsilon\n", " # z = # TODO\n", " return z" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -701,9 +684,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "VjieDs8Ovcqs" }, + "outputs": [], "source": [ "### Loss function for DB-VAE ###\n", "\n", @@ -743,9 +728,7 @@ " # total_loss = # TODO\n", "\n", " return total_loss, classification_loss" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -762,9 +745,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "JfWPHGrmyE7R" }, + "outputs": [], "source": [ "### Define the decoder portion of the DB-VAE ###\n", "\n", @@ -793,9 +778,7 @@ " ])\n", "\n", " return decoder" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -808,9 +791,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "dSFDcFBL13c3" }, + "outputs": [], "source": [ "### Defining and creating the DB-VAE ###\n", "\n", @@ -875,9 +860,7 @@ " return y_logit\n", "\n", "dbvae = DB_VAE(latent_dim)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -915,9 +898,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "ewWbf7TE7wVc" }, + "outputs": [], "source": [ "# Function to return the means for an input image batch\n", "def get_latent_mu(images, dbvae, batch_size=1024):\n", @@ -929,9 +914,7 @@ " _, batch_mu, _ = dbvae.encode(batch)\n", " mu[start_ind:end_ind] = batch_mu\n", " return mu" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -944,9 +927,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "HiX9pmmC7_wn" }, + "outputs": [], "source": [ "### Resampling algorithm for DB-VAE ###\n", "\n", @@ -999,9 +984,7 @@ " training_sample_p /= np.sum(training_sample_p)\n", "\n", " return training_sample_p" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -1014,9 +997,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "xwQs-Gu5bKEK" }, + "outputs": [], "source": [ "### Training the DB-VAE ###\n", "\n", @@ -1089,9 +1074,7 @@ " step += 1\n", "\n", "experiment.end()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -1115,9 +1098,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "bgK77aB9oDtX" }, + "outputs": [], "source": [ "dbvae_logits = [dbvae.predict(np.array(x, dtype=np.float32)) for x in test_faces]\n", "dbvae_probs = tf.squeeze(tf.sigmoid(dbvae_logits))\n", @@ -1128,9 +1113,7 @@ "plt.xticks(xx, keys);\n", "plt.title(\"Network predictions on test dataset\")\n", "plt.ylabel(\"Probability\"); plt.legend(bbox_to_anchor=(1.04,1), loc=\"upper left\");\n" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -1162,5 +1145,22 @@ "" ] } - ] + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "Ag_e7xtTzT1W", + "NDj7KBaW8Asz" + ], + "name": "TF_Part2_Debiasing_Solution.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } From dc2b4a1cbdc0a34e47d79b48a00e62b119741802 Mon Sep 17 00:00:00 2001 From: SEddula Date: Tue, 30 Dec 2025 21:47:15 -0600 Subject: [PATCH 04/34] In Keras 3, add_weight changed its function signature. Need to pass shape as the first positional argument, and name must be passed as a keyword, not positionally. --- lab1/TF_Part1_Intro.ipynb | 10 ++++++++-- lab1/solutions/TF_Part1_Intro_Solution.ipynb | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/lab1/TF_Part1_Intro.ipynb b/lab1/TF_Part1_Intro.ipynb index b79e04df..13a270e4 100644 --- a/lab1/TF_Part1_Intro.ipynb +++ b/lab1/TF_Part1_Intro.ipynb @@ -339,8 +339,14 @@ " d = int(input_shape[-1])\n", " # Define and initialize parameters: a weight matrix W and bias b\n", " # Note that parameter initialization is random!\n", - " self.W = self.add_weight(\"weight\", shape=[d, self.n_output_nodes]) # note the dimensionality\n", - " self.b = self.add_weight(\"bias\", shape=[1, self.n_output_nodes]) # note the dimensionality\n", + " self.W = self.add_weight(\n", + " shape=(d, self.n_output_nodes),\n", + " name=\"weight\",\n", + " )\n", + " self.b = self.add_weight(\n", + " shape=(1, self.n_output_nodes),\n", + " name=\"bias\",\n", + " )\n", "\n", " def call(self, x):\n", " '''TODO: define the operation for z (hint: use tf.matmul)'''\n", diff --git a/lab1/solutions/TF_Part1_Intro_Solution.ipynb b/lab1/solutions/TF_Part1_Intro_Solution.ipynb index 61f502cd..8c76c919 100644 --- a/lab1/solutions/TF_Part1_Intro_Solution.ipynb +++ b/lab1/solutions/TF_Part1_Intro_Solution.ipynb @@ -344,8 +344,14 @@ " d = int(input_shape[-1])\n", " # Define and initialize parameters: a weight matrix W and bias b\n", " # Note that parameter initialization is random!\n", - " self.W = self.add_weight(\"weight\", shape=[d, self.n_output_nodes]) # note the dimensionality\n", - " self.b = self.add_weight(\"bias\", shape=[1, self.n_output_nodes]) # note the dimensionality\n", + " self.W = self.add_weight(\n", + " shape=(d, self.n_output_nodes),\n", + " name=\"weight\",\n", + " )\n", + " self.b = self.add_weight(\n", + " shape=(1, self.n_output_nodes),\n", + " name=\"bias\",\n", + " )\n", "\n", " def call(self, x):\n", " '''TODO: define the operation for z (hint: use tf.matmul)'''\n", From 930e1c3c50b97e65ae2c94795609120c045926fb Mon Sep 17 00:00:00 2001 From: SEddula Date: Wed, 31 Dec 2025 01:19:30 -0600 Subject: [PATCH 05/34] Was running into an issue with reset_states() not running on 'Sequential', went layer by layer instead --- lab1/TF_Part2_Music_Generation.ipynb | 4 +++- lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/lab1/TF_Part2_Music_Generation.ipynb b/lab1/TF_Part2_Music_Generation.ipynb index 63941f0c..4482aaaa 100644 --- a/lab1/TF_Part2_Music_Generation.ipynb +++ b/lab1/TF_Part2_Music_Generation.ipynb @@ -884,7 +884,9 @@ " text_generated = []\n", "\n", " # Here batch size == 1\n", - " model.reset_states()\n", + " for layer in model.layers:\n", + " if hasattr(layer, \"reset_states\"):\n", + " layer.reset_states()\n", " tqdm._instances.clear()\n", "\n", " for i in tqdm(range(generation_length)):\n", diff --git a/lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb b/lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb index df0ded1c..c18437c0 100644 --- a/lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb +++ b/lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb @@ -900,7 +900,10 @@ " text_generated = []\n", "\n", " # Here batch size == 1\n", - " model.reset_states()\n", + " for layer in model.layers:\n", + " if hasattr(layer, \"reset_states\"):\n", + " layer.reset_states()\n", + " \n", " tqdm._instances.clear()\n", "\n", " for i in tqdm(range(generation_length)):\n", @@ -1056,4 +1059,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} From a8ba33451742925d82a74f8e1b4fe0e16de9e33f Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 18:37:28 -0500 Subject: [PATCH 06/34] update PTLab1 links --- lab1/PT_Part2_Music_Generation.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lab1/PT_Part2_Music_Generation.ipynb b/lab1/PT_Part2_Music_Generation.ipynb index afbcbfbb..5edbf632 100644 --- a/lab1/PT_Part2_Music_Generation.ipynb +++ b/lab1/PT_Part2_Music_Generation.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of MIT Introduction\n", @@ -1004,7 +1004,7 @@ "* What if you alter or augment the dataset?\n", "* Does the choice of start string significantly affect the result?\n", "\n", - "Try to optimize your model and submit your best song! **Participants will be eligible for prizes during the January 2025 offering. To enter the competition, you must upload the following to [this submission link](https://www.dropbox.com/request/U8nND6enGjirujVZKX1n):**\n", + "Try to optimize your model and submit your best song! **Participants will be eligible for prizes during the January 2026 offering. To enter the competition, you must upload the following to [this submission link](https://www.dropbox.com/request/4hqfsOnLtX4jH1W3ynfp):**\n", "\n", "* a recording of your song;\n", "* iPython notebook with the code you used to generate the song;\n", From eaa9fc5110af0a516531122b4c6ad4fa9938eaee Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 18:38:46 -0500 Subject: [PATCH 07/34] update 2026 --- lab1/PT_Part1_Intro.ipynb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lab1/PT_Part1_Intro.ipynb b/lab1/PT_Part1_Intro.ipynb index db97d067..d07aa5dc 100644 --- a/lab1/PT_Part1_Intro.ipynb +++ b/lab1/PT_Part1_Intro.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of MIT Introduction\n", @@ -53,7 +53,7 @@ "\n", "## 0.1 Install PyTorch\n", "\n", - "[PyTorch](https://pytorch.org/) is a popular deep learning library known for its flexibility and ease of use. Here we'll learn how computations are represented and how to define a simple neural network in PyTorch. For all the labs in Introduction to Deep Learning 2025, there will be a PyTorch version available.\n", + "[PyTorch](https://pytorch.org/) is a popular deep learning library known for its flexibility and ease of use. Here we'll learn how computations are represented and how to define a simple neural network in PyTorch. For all the labs in Introduction to Deep Learning 2026, there will be a PyTorch version available.\n", "\n", "Let's install PyTorch and a couple of dependencies." ] @@ -203,7 +203,7 @@ "\n", "A convenient way to think about and visualize computations in a machine learning framework like PyTorch is in terms of graphs. We can define this graph in terms of tensors, which hold data, and the mathematical operations that act on these tensors in some order. Let's look at a simple example, and define this computation using PyTorch:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2025/lab1/img/add-graph.png)" + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/add-graph.png)" ] }, { @@ -235,7 +235,7 @@ "\n", "Now let's consider a slightly more complicated example:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2025/lab1/img/computation-graph.png)\n", + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/computation-graph.png)\n", "\n", "Here, we take two inputs, `a, b`, and compute an output `e`. Each node in the graph represents an operation that takes some input, does some computation, and passes its output to another node.\n", "\n", @@ -306,7 +306,7 @@ "\n", "Let's consider the example of a simple perceptron defined by just one dense (aka fully-connected or linear) layer: $ y = \\sigma(Wx + b) $, where $W$ represents a matrix of weights, $b$ is a bias, $x$ is the input, $\\sigma$ is the sigmoid activation function, and $y$ is the output.\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2025/lab1/img/computation-graph-2.png)\n", + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/computation-graph-2.png)\n", "\n", "We will use `torch.nn.Module` to define layers -- the building blocks of neural networks. Layers implement common neural networks operations. In PyTorch, when we implement a layer, we subclass `nn.Module` and define the parameters of the layer as attributes of our new class. We also define and override a function [``forward``](https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.forward), which will define the forward pass computation that is performed at every step. All classes subclassing `nn.Module` should override the `forward` function.\n", "\n", From 9b5e542af9f68f42f2f34e6b49e480e33a10e871 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 18:39:23 -0500 Subject: [PATCH 08/34] update 2026 --- lab1/TF_Part1_Intro.ipynb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lab1/TF_Part1_Intro.ipynb b/lab1/TF_Part1_Intro.ipynb index 13a270e4..d21f43e4 100644 --- a/lab1/TF_Part1_Intro.ipynb +++ b/lab1/TF_Part1_Intro.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of MIT Introduction\n", @@ -53,7 +53,7 @@ "\n", "## 0.1 Install TensorFlow\n", "\n", - "TensorFlow is a software library extensively used in machine learning. Here we'll learn how computations are represented and how to define a simple neural network in TensorFlow. For all the TensorFlow labs in Introduction to Deep Learning 2025, we'll be using TensorFlow 2, which affords great flexibility and the ability to imperatively execute operations, just like in Python. You'll notice that TensorFlow 2 is quite similar to Python in its syntax and imperative execution. Let's install TensorFlow and a couple of dependencies.\n" + "TensorFlow is a software library extensively used in machine learning. Here we'll learn how computations are represented and how to define a simple neural network in TensorFlow. For all the TensorFlow labs in Introduction to Deep Learning 2026, we'll be using TensorFlow 2, which affords great flexibility and the ability to imperatively execute operations, just like in Python. You'll notice that TensorFlow 2 is quite similar to Python in its syntax and imperative execution. Let's install TensorFlow and a couple of dependencies.\n" ] }, { @@ -208,7 +208,7 @@ "\n", "A convenient way to think about and visualize computations in TensorFlow is in terms of graphs. We can define this graph in terms of Tensors, which hold data, and the mathematical operations that act on these Tensors in some order. Let's look at a simple example, and define this computation using TensorFlow:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2025/lab1/img/add-graph.png)" + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/add-graph.png)" ] }, { @@ -240,7 +240,7 @@ "\n", "Now let's consider a slightly more complicated example:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2025/lab1/img/computation-graph.png)\n", + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/computation-graph.png)\n", "\n", "Here, we take two inputs, `a, b`, and compute an output `e`. Each node in the graph represents an operation that takes some input, does some computation, and passes its output to another node.\n", "\n", @@ -311,7 +311,7 @@ "\n", "Let's first consider the example of a simple perceptron defined by just one dense layer: $ y = \\sigma(Wx + b)$, where $W$ represents a matrix of weights, $b$ is a bias, $x$ is the input, $\\sigma$ is the sigmoid activation function, and $y$ is the output. We can also visualize this operation using a graph:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2025/lab1/img/computation-graph-2.png)\n", + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/computation-graph-2.png)\n", "\n", "Tensors can flow through abstract types called [```Layers```](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer) -- the building blocks of neural networks. ```Layers``` implement common neural networks operations, and are used to update weights, compute losses, and define inter-layer connectivity. We will first define a ```Layer``` to implement the simple perceptron defined above." ] From e20e0600593056fdca539a73242c581bf5a5fd6e Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 18:40:26 -0500 Subject: [PATCH 09/34] update 2026 + link --- lab1/TF_Part2_Music_Generation.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lab1/TF_Part2_Music_Generation.ipynb b/lab1/TF_Part2_Music_Generation.ipynb index 4482aaaa..06c721a1 100644 --- a/lab1/TF_Part2_Music_Generation.ipynb +++ b/lab1/TF_Part2_Music_Generation.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of MIT Introduction\n", @@ -993,7 +993,7 @@ "* What if you alter or augment the dataset?\n", "* Does the choice of start string significantly affect the result?\n", "\n", - "Try to optimize your model and submit your best song! **Participants will be eligible for prizes during the January 2025 offering. To enter the competition, you must upload the following to [this submission link](https://www.dropbox.com/request/U8nND6enGjirujVZKX1n):**\n", + "Try to optimize your model and submit your best song! **Participants will be eligible for prizes during the January 2025 offering. To enter the competition, you must upload the following to [this submission link](https://www.dropbox.com/request/4hqfsOnLtX4jH1W3ynfp):**\n", "\n", "* a recording of your song;\n", "* iPython notebook with the code you used to generate the song;\n", From 6805092e1519bdb5a683cb83cbea6aa8e4b85a19 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 18:44:38 -0500 Subject: [PATCH 10/34] update 2026 --- lab1/solutions/PT_Part1_Intro_Solution.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lab1/solutions/PT_Part1_Intro_Solution.ipynb b/lab1/solutions/PT_Part1_Intro_Solution.ipynb index f0529493..7b40df93 100644 --- a/lab1/solutions/PT_Part1_Intro_Solution.ipynb +++ b/lab1/solutions/PT_Part1_Intro_Solution.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of MIT Introduction\n", @@ -241,7 +241,7 @@ "\n", "A convenient way to think about and visualize computations in a machine learning framework like PyTorch is in terms of graphs. We can define this graph in terms of tensors, which hold data, and the mathematical operations that act on these tensors in some order. Let's look at a simple example, and define this computation using PyTorch:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2025/lab1/img/add-graph.png)" + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/add-graph.png)" ] }, { From f48bf4fb7e5edc3aaa39a8d94c460707933381a7 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 18:45:36 -0500 Subject: [PATCH 11/34] update 2026 + submission link --- lab1/solutions/PT_Part2_Music_Generation_Solution.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lab1/solutions/PT_Part2_Music_Generation_Solution.ipynb b/lab1/solutions/PT_Part2_Music_Generation_Solution.ipynb index ac0335d9..84a3b92f 100644 --- a/lab1/solutions/PT_Part2_Music_Generation_Solution.ipynb +++ b/lab1/solutions/PT_Part2_Music_Generation_Solution.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of MIT Introduction\n", @@ -1025,7 +1025,7 @@ "* What if you alter or augment the dataset?\n", "* Does the choice of start string significantly affect the result?\n", "\n", - "Try to optimize your model and submit your best song! **Participants will be eligible for prizes during the January 2025 offering. To enter the competition, you must upload the following to [this submission link](https://www.dropbox.com/request/U8nND6enGjirujVZKX1n):**\n", + "Try to optimize your model and submit your best song! **Participants will be eligible for prizes during the January 2026 offering. To enter the competition, you must upload the following to [this submission link](https://www.dropbox.com/request/4hqfsOnLtX4jH1W3ynfp):**\n", "\n", "* a recording of your song;\n", "* iPython notebook with the code you used to generate the song;\n", @@ -1070,4 +1070,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} From 11248eb902a1fe5b41a938e24b369aaacaa7fe80 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 18:46:30 -0500 Subject: [PATCH 12/34] update 2026 --- lab1/solutions/TF_Part1_Intro_Solution.ipynb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lab1/solutions/TF_Part1_Intro_Solution.ipynb b/lab1/solutions/TF_Part1_Intro_Solution.ipynb index 8c76c919..bcf09c6c 100644 --- a/lab1/solutions/TF_Part1_Intro_Solution.ipynb +++ b/lab1/solutions/TF_Part1_Intro_Solution.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of MIT Introduction\n", @@ -53,7 +53,7 @@ "\n", "## 0.1 Install TensorFlow\n", "\n", - "TensorFlow is a software library extensively used in machine learning. Here we'll learn how computations are represented and how to define a simple neural network in TensorFlow. For all the TensorFlow labs in Introduction to Deep Learning 2025, we'll be using TensorFlow 2, which affords great flexibility and the ability to imperatively execute operations, just like in Python. You'll notice that TensorFlow 2 is quite similar to Python in its syntax and imperative execution. Let's install TensorFlow and a couple of dependencies.\n" + "TensorFlow is a software library extensively used in machine learning. Here we'll learn how computations are represented and how to define a simple neural network in TensorFlow. For all the TensorFlow labs in Introduction to Deep Learning 2026, we'll be using TensorFlow 2, which affords great flexibility and the ability to imperatively execute operations, just like in Python. You'll notice that TensorFlow 2 is quite similar to Python in its syntax and imperative execution. Let's install TensorFlow and a couple of dependencies.\n" ] }, { @@ -210,7 +210,7 @@ "\n", "A convenient way to think about and visualize computations in TensorFlow is in terms of graphs. We can define this graph in terms of Tensors, which hold data, and the mathematical operations that act on these Tensors in some order. Let's look at a simple example, and define this computation using TensorFlow:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2025/lab1/img/add-graph.png)" + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/add-graph.png)" ] }, { @@ -242,7 +242,7 @@ "\n", "Now let's consider a slightly more complicated example:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2025/lab1/img/computation-graph.png)\n", + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/computation-graph.png)\n", "\n", "Here, we take two inputs, `a, b`, and compute an output `e`. Each node in the graph represents an operation that takes some input, does some computation, and passes its output to another node.\n", "\n", @@ -316,7 +316,7 @@ "\n", "Let's first consider the example of a simple perceptron defined by just one dense layer: $ y = \\sigma(Wx + b)$, where $W$ represents a matrix of weights, $b$ is a bias, $x$ is the input, $\\sigma$ is the sigmoid activation function, and $y$ is the output. We can also visualize this operation using a graph:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2025/lab1/img/computation-graph-2.png)\n", + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/computation-graph-2.png)\n", "\n", "Tensors can flow through abstract types called [```Layers```](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer) -- the building blocks of neural networks. ```Layers``` implement common neural networks operations, and are used to update weights, compute losses, and define inter-layer connectivity. We will first define a ```Layer``` to implement the simple perceptron defined above." ] From 6211f55e7cfcdd7570c16f5c74d56d9a02c76035 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 18:47:27 -0500 Subject: [PATCH 13/34] update 2026 + submission link --- lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb b/lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb index c18437c0..e9f76e31 100644 --- a/lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb +++ b/lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of MIT Introduction\n", @@ -1014,7 +1014,7 @@ "* What if you alter or augment the dataset?\n", "* Does the choice of start string significantly affect the result?\n", "\n", - "Try to optimize your model and submit your best song! **Participants will be eligible for prizes during the January 2025 offering. To enter the competition, you must upload the following to [this submission link](https://www.dropbox.com/request/U8nND6enGjirujVZKX1n):**\n", + "Try to optimize your model and submit your best song! **Participants will be eligible for prizes during the January 2026 offering. To enter the competition, you must upload the following to [this submission link](https://www.dropbox.com/request/4hqfsOnLtX4jH1W3ynfp):**\n", "\n", "* a recording of your song;\n", "* iPython notebook with the code you used to generate the song;\n", From dc1434c798c245c50c85332106cd3e325c9ce89f Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 18:51:22 -0500 Subject: [PATCH 14/34] update 2026 --- lab2/PT_Part1_MNIST.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lab2/PT_Part1_MNIST.ipynb b/lab2/PT_Part1_MNIST.ipynb index bcfae490..4a345b27 100644 --- a/lab2/PT_Part1_MNIST.ipynb +++ b/lab2/PT_Part1_MNIST.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of MIT Introduction\n", From f33f764511b731b15623eb5201eb576128d6e622 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 18:53:00 -0500 Subject: [PATCH 15/34] Update copyright year and submission link --- lab2/PT_Part2_Debiasing.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lab2/PT_Part2_Debiasing.ipynb b/lab2/PT_Part2_Debiasing.ipynb index fc892426..b4fd03ed 100644 --- a/lab2/PT_Part2_Debiasing.ipynb +++ b/lab2/PT_Part2_Debiasing.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT 6.S191 Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT 6.S191 Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of 6.S191 must\n", @@ -1299,7 +1299,7 @@ "* Do you think it should be necessary for companies to demonstrate that their models, particularly in the context of tasks like facial detection, are not biased? If so, do you have thoughts on how this could be standardized and implemented?\n", "* Do you have ideas for other ways to address issues of bias, particularly in terms of the training data?\n", "\n", - "**The debiased model may or may not perform well based on the initial hyperparameters. This lab competition will be focused on your answers to the questions above, experiments you tried, and your interpretation and analysis of the results. To enter the competition, please upload the following to the lab submission site for the Debiasing Faces Lab ([submission upload link](https://www.dropbox.com/request/dJZUEoqGLB43JEKzzqIc)).**\n", + "**The debiased model may or may not perform well based on the initial hyperparameters. This lab competition will be focused on your answers to the questions above, experiments you tried, and your interpretation and analysis of the results. To enter the competition, please upload the following to the lab submission site for the Debiasing Faces Lab ([submission upload link](https://www.dropbox.com/request/SG6AnjrtIljNPrbEOMfP)).**\n", "\n", "* Jupyter notebook with the code you used to generate your results;\n", "* copy of the bar plot from section 2.6 showing the performance of your model;\n", From 88af21f1dfec5809b117d1fc46f07064a5a08e09 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 18:55:25 -0500 Subject: [PATCH 16/34] update 2026 --- lab2/TF_Part1_MNIST.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lab2/TF_Part1_MNIST.ipynb b/lab2/TF_Part1_MNIST.ipynb index 81549151..796ad03c 100644 --- a/lab2/TF_Part1_MNIST.ipynb +++ b/lab2/TF_Part1_MNIST.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of MIT Introduction\n", From 7743df7cb15714e134dde00162bc8908d91f73e8 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 18:56:15 -0500 Subject: [PATCH 17/34] update 2026 + submission link --- lab2/TF_Part2_Debiasing.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lab2/TF_Part2_Debiasing.ipynb b/lab2/TF_Part2_Debiasing.ipynb index 32ea5c3c..92d6719e 100644 --- a/lab2/TF_Part2_Debiasing.ipynb +++ b/lab2/TF_Part2_Debiasing.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT 6.S191 Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT 6.S191 Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of 6.S191 must\n", @@ -1112,7 +1112,7 @@ "* Do you think it should be necessary for companies to demonstrate that their models, particularly in the context of tasks like facial detection, are not biased? If so, do you have thoughts on how this could be standardized and implemented?\n", "* Do you have ideas for other ways to address issues of bias, particularly in terms of the training data?\n", "\n", - "**Try to optimize your model to achieve improved performance. To enter the competition, please upload the following to the lab submission site for the Debiasing Faces Lab ([submission upload link](https://www.dropbox.com/request/dJZUEoqGLB43JEKzzqIc)).**\n", + "**Try to optimize your model to achieve improved performance. To enter the competition, please upload the following to the lab submission site for the Debiasing Faces Lab ([submission upload link](https://www.dropbox.com/request/SG6AnjrtIljNPrbEOMfP)).**\n", "\n", "* Jupyter notebook with the code you used to generate your results;\n", "* copy of the bar plot from section 2.6 showing the performance of your model;\n", From 389f46cd3de80e2a53aa57186a03523251426236 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 18:59:36 -0500 Subject: [PATCH 18/34] update 2026 --- lab2/solutions/PT_Part1_MNIST_Solution.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lab2/solutions/PT_Part1_MNIST_Solution.ipynb b/lab2/solutions/PT_Part1_MNIST_Solution.ipynb index 01dc5bfa..efb23831 100644 --- a/lab2/solutions/PT_Part1_MNIST_Solution.ipynb +++ b/lab2/solutions/PT_Part1_MNIST_Solution.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of MIT Introduction\n", @@ -1026,4 +1026,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} From 13734e644264ff247db11d959d8fe2f87b1ad2c2 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 19:03:03 -0500 Subject: [PATCH 19/34] update 2026 + submission link --- lab2/solutions/PT_Part2_Debiasing_Solution.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lab2/solutions/PT_Part2_Debiasing_Solution.ipynb b/lab2/solutions/PT_Part2_Debiasing_Solution.ipynb index b258530b..02b4351e 100644 --- a/lab2/solutions/PT_Part2_Debiasing_Solution.ipynb +++ b/lab2/solutions/PT_Part2_Debiasing_Solution.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT 6.S191 Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT 6.S191 Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of 6.S191 must\n", @@ -1320,7 +1320,7 @@ "* Do you think it should be necessary for companies to demonstrate that their models, particularly in the context of tasks like facial detection, are not biased? If so, do you have thoughts on how this could be standardized and implemented?\n", "* Do you have ideas for other ways to address issues of bias, particularly in terms of the training data?\n", "\n", - "**The debiased model may or may not perform well based on the initial hyperparameters. This lab competition will be focused on your answers to the questions above, experiments you tried, and your interpretation and analysis of the results. To enter the competition, please upload the following to the lab submission site for the Debiasing Faces Lab ([submission upload link](https://www.dropbox.com/request/dJZUEoqGLB43JEKzzqIc)).**\n", + "**The debiased model may or may not perform well based on the initial hyperparameters. This lab competition will be focused on your answers to the questions above, experiments you tried, and your interpretation and analysis of the results. To enter the competition, please upload the following to the lab submission site for the Debiasing Faces Lab ([submission upload link](https://www.dropbox.com/request/SG6AnjrtIljNPrbEOMfP)).**\n", "\n", "* Jupyter notebook with the code you used to generate your results;\n", "* copy of the bar plot from section 2.6 showing the performance of your model;\n", From ac37a5a360c5f29712ee6928c435c3650883a45a Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 19:05:50 -0500 Subject: [PATCH 20/34] update 2026 --- lab2/solutions/TF_Part1_MNIST_Solution.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lab2/solutions/TF_Part1_MNIST_Solution.ipynb b/lab2/solutions/TF_Part1_MNIST_Solution.ipynb index c2ae8377..90db5d6e 100644 --- a/lab2/solutions/TF_Part1_MNIST_Solution.ipynb +++ b/lab2/solutions/TF_Part1_MNIST_Solution.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of MIT Introduction\n", From d447d995ac4b6160f5a0d3c312fd7547fc8dc6e8 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 19:06:37 -0500 Subject: [PATCH 21/34] update 2026 + submission link --- lab2/solutions/TF_Part2_Debiasing_Solution.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lab2/solutions/TF_Part2_Debiasing_Solution.ipynb b/lab2/solutions/TF_Part2_Debiasing_Solution.ipynb index bbc8b78a..71ce318d 100644 --- a/lab2/solutions/TF_Part2_Debiasing_Solution.ipynb +++ b/lab2/solutions/TF_Part2_Debiasing_Solution.ipynb @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "# Copyright 2025 MIT 6.S191 Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT 6.S191 Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of 6.S191 must\n", @@ -1131,7 +1131,7 @@ "* Do you think it should be necessary for companies to demonstrate that their models, particularly in the context of tasks like facial detection, are not biased? If so, do you have thoughts on how this could be standardized and implemented?\n", "* Do you have ideas for other ways to address issues of bias, particularly in terms of the training data?\n", "\n", - "**Try to optimize your model to achieve improved performance. To enter the competition, please upload the following to the lab submission site for the Debiasing Faces Lab ([submission upload link](https://www.dropbox.com/request/dJZUEoqGLB43JEKzzqIc)).**\n", + "**Try to optimize your model to achieve improved performance. To enter the competition, please upload the following to the lab submission site for the Debiasing Faces Lab ([submission upload link](https://www.dropbox.com/request/SG6AnjrtIljNPrbEOMfP)).**\n", "\n", "* Jupyter notebook with the code you used to generate your results;\n", "* copy of the bar plot from section 2.6 showing the performance of your model;\n", From b4ddb15e2630dafa935b67872ad2e3a2675d1f47 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 19:07:26 -0500 Subject: [PATCH 22/34] img pointers to master --- lab1/PT_Part1_Intro.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lab1/PT_Part1_Intro.ipynb b/lab1/PT_Part1_Intro.ipynb index d07aa5dc..49bcdeee 100644 --- a/lab1/PT_Part1_Intro.ipynb +++ b/lab1/PT_Part1_Intro.ipynb @@ -203,7 +203,7 @@ "\n", "A convenient way to think about and visualize computations in a machine learning framework like PyTorch is in terms of graphs. We can define this graph in terms of tensors, which hold data, and the mathematical operations that act on these tensors in some order. Let's look at a simple example, and define this computation using PyTorch:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/add-graph.png)" + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/add-graph.png)" ] }, { @@ -235,7 +235,7 @@ "\n", "Now let's consider a slightly more complicated example:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/computation-graph.png)\n", + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/computation-graph.png)\n", "\n", "Here, we take two inputs, `a, b`, and compute an output `e`. Each node in the graph represents an operation that takes some input, does some computation, and passes its output to another node.\n", "\n", @@ -306,7 +306,7 @@ "\n", "Let's consider the example of a simple perceptron defined by just one dense (aka fully-connected or linear) layer: $ y = \\sigma(Wx + b) $, where $W$ represents a matrix of weights, $b$ is a bias, $x$ is the input, $\\sigma$ is the sigmoid activation function, and $y$ is the output.\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/computation-graph-2.png)\n", + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/computation-graph-2.png)\n", "\n", "We will use `torch.nn.Module` to define layers -- the building blocks of neural networks. Layers implement common neural networks operations. In PyTorch, when we implement a layer, we subclass `nn.Module` and define the parameters of the layer as attributes of our new class. We also define and override a function [``forward``](https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.forward), which will define the forward pass computation that is performed at every step. All classes subclassing `nn.Module` should override the `forward` function.\n", "\n", From ba5620304bba57aa6eddd27477dd14b4f1b0827e Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 19:08:18 -0500 Subject: [PATCH 23/34] img pointers to master --- lab1/PT_Part2_Music_Generation.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lab1/PT_Part2_Music_Generation.ipynb b/lab1/PT_Part2_Music_Generation.ipynb index 5edbf632..e4982c27 100644 --- a/lab1/PT_Part2_Music_Generation.ipynb +++ b/lab1/PT_Part2_Music_Generation.ipynb @@ -399,7 +399,7 @@ "* [`nn.LSTM`](https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html): Our LSTM network, with size `hidden_size`.\n", "* [`nn.Linear`](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html): The output layer, with `vocab_size` outputs.\n", "\n", - "\"Drawing\"/\n", + "\"Drawing\"/\n", "\n", "\n", "\n", @@ -415,7 +415,7 @@ "* [`tf.keras.layers.Dense`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense): The output layer, with `vocab_size` outputs.\n", "\n", "\n", - "\"Drawing\"/ -->" + "\"Drawing\"/ -->" ] }, { @@ -875,7 +875,7 @@ "\n", "* At each time step, the updated RNN state is fed back into the model, so that it now has more context in making the next prediction. After predicting the next character, the updated RNN states are again fed back into the model, which is how it learns sequence dependencies in the data, as it gets more information from the previous predictions.\n", "\n", - "![LSTM inference](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2019/lab1/img/lstm_inference.png)\n", + "![LSTM inference](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/lstm_inference.png)\n", "\n", "Complete and experiment with this code block (as well as some of the aspects of network definition and training!), and see how the model performs. How do songs generated after training with a small number of epochs compare to those generated after a longer duration of training?" ] From 11fdf5c0e404fb91491026326e95ff8edb7754fb Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 19:08:53 -0500 Subject: [PATCH 24/34] update img pointers to master --- lab1/TF_Part1_Intro.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lab1/TF_Part1_Intro.ipynb b/lab1/TF_Part1_Intro.ipynb index d21f43e4..6c5812a2 100644 --- a/lab1/TF_Part1_Intro.ipynb +++ b/lab1/TF_Part1_Intro.ipynb @@ -208,7 +208,7 @@ "\n", "A convenient way to think about and visualize computations in TensorFlow is in terms of graphs. We can define this graph in terms of Tensors, which hold data, and the mathematical operations that act on these Tensors in some order. Let's look at a simple example, and define this computation using TensorFlow:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/add-graph.png)" + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/add-graph.png)" ] }, { @@ -240,7 +240,7 @@ "\n", "Now let's consider a slightly more complicated example:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/computation-graph.png)\n", + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/computation-graph.png)\n", "\n", "Here, we take two inputs, `a, b`, and compute an output `e`. Each node in the graph represents an operation that takes some input, does some computation, and passes its output to another node.\n", "\n", @@ -311,7 +311,7 @@ "\n", "Let's first consider the example of a simple perceptron defined by just one dense layer: $ y = \\sigma(Wx + b)$, where $W$ represents a matrix of weights, $b$ is a bias, $x$ is the input, $\\sigma$ is the sigmoid activation function, and $y$ is the output. We can also visualize this operation using a graph:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/computation-graph-2.png)\n", + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/computation-graph-2.png)\n", "\n", "Tensors can flow through abstract types called [```Layers```](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer) -- the building blocks of neural networks. ```Layers``` implement common neural networks operations, and are used to update weights, compute losses, and define inter-layer connectivity. We will first define a ```Layer``` to implement the simple perceptron defined above." ] From c3c26b096491933333194923e7999972cae46e33 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 19:09:43 -0500 Subject: [PATCH 25/34] update img pointers to master --- lab1/TF_Part2_Music_Generation.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lab1/TF_Part2_Music_Generation.ipynb b/lab1/TF_Part2_Music_Generation.ipynb index 06c721a1..61128c82 100644 --- a/lab1/TF_Part2_Music_Generation.ipynb +++ b/lab1/TF_Part2_Music_Generation.ipynb @@ -399,7 +399,7 @@ "* [`tf.keras.layers.Dense`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense): The output layer, with `vocab_size` outputs.\n", "\n", "\n", - "\"Drawing\"/" + "\"Drawing\"/" ] }, { @@ -858,7 +858,7 @@ "\n", "* At each time step, the updated RNN state is fed back into the model, so that it now has more context in making the next prediction. After predicting the next character, the updated RNN states are again fed back into the model, which is how it learns sequence dependencies in the data, as it gets more information from the previous predictions.\n", "\n", - "![LSTM inference](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2019/lab1/img/lstm_inference.png)\n", + "![LSTM inference](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/lstm_inference.png)\n", "\n", "Complete and experiment with this code block (as well as some of the aspects of network definition and training!), and see how the model performs. How do songs generated after training with a small number of epochs compare to those generated after a longer duration of training?" ] From 0fc954819290ea70ddfc0f9f0939cc035116e699 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 19:10:48 -0500 Subject: [PATCH 26/34] update img pointers to master --- lab1/solutions/PT_Part1_Intro_Solution.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lab1/solutions/PT_Part1_Intro_Solution.ipynb b/lab1/solutions/PT_Part1_Intro_Solution.ipynb index 7b40df93..a8feddf7 100644 --- a/lab1/solutions/PT_Part1_Intro_Solution.ipynb +++ b/lab1/solutions/PT_Part1_Intro_Solution.ipynb @@ -241,7 +241,7 @@ "\n", "A convenient way to think about and visualize computations in a machine learning framework like PyTorch is in terms of graphs. We can define this graph in terms of tensors, which hold data, and the mathematical operations that act on these tensors in some order. Let's look at a simple example, and define this computation using PyTorch:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/add-graph.png)" + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/add-graph.png)" ] }, { @@ -282,7 +282,7 @@ "\n", "Now let's consider a slightly more complicated example:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2025/lab1/img/computation-graph.png)\n", + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/computation-graph.png)\n", "\n", "Here, we take two inputs, `a, b`, and compute an output `e`. Each node in the graph represents an operation that takes some input, does some computation, and passes its output to another node.\n", "\n", @@ -364,7 +364,7 @@ "\n", "Let's consider the example of a simple perceptron defined by just one dense (aka fully-connected or linear) layer: $ y = \\sigma(Wx + b) $, where $W$ represents a matrix of weights, $b$ is a bias, $x$ is the input, $\\sigma$ is the sigmoid activation function, and $y$ is the output.\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2025/lab1/img/computation-graph-2.png)\n", + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/computation-graph-2.png)\n", "\n", "We will use `torch.nn.Module` to define layers -- the building blocks of neural networks. Layers implement common neural networks operations. In PyTorch, when we implement a layer, we subclass `nn.Module` and define the parameters of the layer as attributes of our new class. We also define and override a function [``forward``](https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.forward), which will define the forward pass computation that is performed at every step. All classes subclassing `nn.Module` should override the `forward` function.\n", "\n", From 9065e636ca7d9ee1f06cd27f0109a14e00786c70 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 19:11:45 -0500 Subject: [PATCH 27/34] update img pointers to master --- lab1/solutions/PT_Part2_Music_Generation_Solution.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lab1/solutions/PT_Part2_Music_Generation_Solution.ipynb b/lab1/solutions/PT_Part2_Music_Generation_Solution.ipynb index 84a3b92f..239dd4e0 100644 --- a/lab1/solutions/PT_Part2_Music_Generation_Solution.ipynb +++ b/lab1/solutions/PT_Part2_Music_Generation_Solution.ipynb @@ -403,7 +403,7 @@ "* [`nn.LSTM`](https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html): Our LSTM network, with size `hidden_size`.\n", "* [`nn.Linear`](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html): The output layer, with `vocab_size` outputs.\n", "\n", - "\"Drawing\"/\n", + "\"Drawing\"/\n", "\n", "\n", "\n", @@ -419,7 +419,7 @@ "* [`tf.keras.layers.Dense`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense): The output layer, with `vocab_size` outputs.\n", "\n", "\n", - "\"Drawing\"/ -->" + "\"Drawing\"/ -->" ] }, { @@ -890,7 +890,7 @@ "\n", "* At each time step, the updated RNN state is fed back into the model, so that it now has more context in making the next prediction. After predicting the next character, the updated RNN states are again fed back into the model, which is how it learns sequence dependencies in the data, as it gets more information from the previous predictions.\n", "\n", - "![LSTM inference](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2019/lab1/img/lstm_inference.png)\n", + "![LSTM inference](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/lstm_inference.png)\n", "\n", "Complete and experiment with this code block (as well as some of the aspects of network definition and training!), and see how the model performs. How do songs generated after training with a small number of epochs compare to those generated after a longer duration of training?" ] From 9c28b17f732c5fcdff4e16879c99340134bc13fd Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 19:13:09 -0500 Subject: [PATCH 28/34] update img pointers to master --- lab1/solutions/TF_Part1_Intro_Solution.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lab1/solutions/TF_Part1_Intro_Solution.ipynb b/lab1/solutions/TF_Part1_Intro_Solution.ipynb index bcf09c6c..3c1aff9e 100644 --- a/lab1/solutions/TF_Part1_Intro_Solution.ipynb +++ b/lab1/solutions/TF_Part1_Intro_Solution.ipynb @@ -210,7 +210,7 @@ "\n", "A convenient way to think about and visualize computations in TensorFlow is in terms of graphs. We can define this graph in terms of Tensors, which hold data, and the mathematical operations that act on these Tensors in some order. Let's look at a simple example, and define this computation using TensorFlow:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/add-graph.png)" + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/add-graph.png)" ] }, { @@ -242,7 +242,7 @@ "\n", "Now let's consider a slightly more complicated example:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/computation-graph.png)\n", + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/computation-graph.png)\n", "\n", "Here, we take two inputs, `a, b`, and compute an output `e`. Each node in the graph represents an operation that takes some input, does some computation, and passes its output to another node.\n", "\n", @@ -316,7 +316,7 @@ "\n", "Let's first consider the example of a simple perceptron defined by just one dense layer: $ y = \\sigma(Wx + b)$, where $W$ represents a matrix of weights, $b$ is a bias, $x$ is the input, $\\sigma$ is the sigmoid activation function, and $y$ is the output. We can also visualize this operation using a graph:\n", "\n", - "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2026/lab1/img/computation-graph-2.png)\n", + "![alt text](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/computation-graph-2.png)\n", "\n", "Tensors can flow through abstract types called [```Layers```](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer) -- the building blocks of neural networks. ```Layers``` implement common neural networks operations, and are used to update weights, compute losses, and define inter-layer connectivity. We will first define a ```Layer``` to implement the simple perceptron defined above." ] From dad43107062f68358aa8fec4dea0bf1daa70dacc Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 19:13:43 -0500 Subject: [PATCH 29/34] update img pointers to master --- lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb b/lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb index e9f76e31..5524ddd1 100644 --- a/lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb +++ b/lab1/solutions/TF_Part2_Music_Generation_Solution.ipynb @@ -404,7 +404,7 @@ "* [`tf.keras.layers.Dense`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense): The output layer, with `vocab_size` outputs.\n", "\n", "\n", - "\"Drawing\"/" + "\"Drawing\"/" ] }, { @@ -873,7 +873,7 @@ "\n", "* At each time step, the updated RNN state is fed back into the model, so that it now has more context in making the next prediction. After predicting the next character, the updated RNN states are again fed back into the model, which is how it learns sequence dependencies in the data, as it gets more information from the previous predictions.\n", "\n", - "![LSTM inference](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/2019/lab1/img/lstm_inference.png)\n", + "![LSTM inference](https://raw.githubusercontent.com/MITDeepLearning/introtodeeplearning/master/lab1/img/lstm_inference.png)\n", "\n", "Complete and experiment with this code block (as well as some of the aspects of network definition and training!), and see how the model performs. How do songs generated after training with a small number of epochs compare to those generated after a longer duration of training?" ] From 87e323d7efbdbef0a93d1a64c97250f7dacab485 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 19:15:21 -0500 Subject: [PATCH 30/34] Update copyright year and submission link --- lab3/LLM_Finetuning.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lab3/LLM_Finetuning.ipynb b/lab3/LLM_Finetuning.ipynb index 137c4955..58213d2f 100644 --- a/lab3/LLM_Finetuning.ipynb +++ b/lab3/LLM_Finetuning.ipynb @@ -23,7 +23,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of MIT Introduction\n", @@ -1251,7 +1251,7 @@ "source": [ "# Submission information\n", "\n", - "To enter the competition, please upload the following to the lab [submission site for the Large Language Models Lab](https://www.dropbox.com/request/vrDrNCkj4yDxgsi2O5Sw)):\n", + "To enter the competition, please upload the following to the lab [submission site for the Large Language Models Lab](https://www.dropbox.com/request/l2JH7UlrayUl1Ps5ZVZm)):\n", "\n", "* Jupyter notebook with the code you used to generate your results;\n", "* copy of the bar plot showing the judge LLM's scores of text in base style, generated text, and text in true Yoda-speak style;\n", From 9f21659d185d20428339f577518a6bd9b6514388 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 19:16:32 -0500 Subject: [PATCH 31/34] update copyright year and submission link --- lab3/solutions/LLM_Finetuning_Solution.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lab3/solutions/LLM_Finetuning_Solution.ipynb b/lab3/solutions/LLM_Finetuning_Solution.ipynb index 428e8b30..c9848839 100644 --- a/lab3/solutions/LLM_Finetuning_Solution.ipynb +++ b/lab3/solutions/LLM_Finetuning_Solution.ipynb @@ -23,7 +23,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Copyright 2025 MIT Introduction to Deep Learning. All Rights Reserved.\n", + "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", "#\n", "# Licensed under the MIT License. You may not use this file except in compliance\n", "# with the License. Use and/or modification of this code outside of MIT Introduction\n", @@ -1275,7 +1275,7 @@ "source": [ "# Submission information\n", "\n", - "To enter the competition, please upload the following to the lab [submission site for the Large Language Models Lab](https://www.dropbox.com/request/vrDrNCkj4yDxgsi2O5Sw)):\n", + "To enter the competition, please upload the following to the lab [submission site for the Large Language Models Lab](https://www.dropbox.com/request/l2JH7UlrayUl1Ps5ZVZm)):\n", "\n", "* Jupyter notebook with the code you used to generate your results;\n", "* copy of the bar plot showing the judge LLM's scores of text in base style, generated text, and text in true Yoda-speak style;\n", From 1fb79a5261c8ff2d8b393e3555bf3e666481f811 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 22:07:04 -0500 Subject: [PATCH 32/34] lfm2 finetuning, gemini2.5 judge, opik eval --- lab3/solutions/LLM_Finetuning_Solution.ipynb | 4734 ++---------------- 1 file changed, 286 insertions(+), 4448 deletions(-) diff --git a/lab3/solutions/LLM_Finetuning_Solution.ipynb b/lab3/solutions/LLM_Finetuning_Solution.ipynb index c9848839..b7656646 100644 --- a/lab3/solutions/LLM_Finetuning_Solution.ipynb +++ b/lab3/solutions/LLM_Finetuning_Solution.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "yh8WeSsQfnyw" + }, "source": [ "\n", "
\n", @@ -20,7 +22,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "O-pRdpMbfnyw" + }, "outputs": [], "source": [ "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", @@ -36,13 +40,15 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "StmM5Grmfnyx" + }, "source": [ "# Laboratory 3: Large Language Model (LLM) Fine-tuning\n", "\n", "In this lab, you will fine-tune a multi-billion parameter large language model (LLM). We will go through several fundamental concepts of LLMs, including tokenization, templates, and fine-tuning. This lab provides a complete pipeline for fine-tuning a language model to generate responses in a specific style, and you will explore not only language model fine-tuning, but also ways to evaluate the performance of a language model.\n", "\n", - "You will use Google's [Gemma 2B](https://huggingface.co/google/gemma-2b-it) model as the base language model to fine-tune; [Liquid AI's](https://www.liquid.ai/) [LFM-40B](https://www.liquid.ai/liquid-foundation-models) as an evaluation \"judge\" model; and Comet ML's [Opik](https://www.comet.com/site/products/opik/) as a framework for streamlined LLM evaluation.\n", + "You will use [Liquid AI's](https://www.liquid.ai/) [LFM2-1.2B](https://huggingface.co/LiquidAI/LFM2-1.2B) as the base language model to fine-tune; Google's [Gemini 2.5](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/) model as an evaluation \"judge\" model; and Comet ML's [Opik](https://www.comet.com/site/products/opik/) as a framework for streamlined LLM evaluation.\n", "\n", "First, let's download the MIT deep learning package, install dependencies, and import the relevant packages we'll need for this lab." ] @@ -86,16 +92,20 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "j-qsDChnfnyx" + }, "source": [ "# Part 1: Fine-tuning an LLM for style\n", "\n", - "In the first part of this lab, we will fine-tune an LLM as a chatbot that can generate responses in a specific style. We will use the [Gemma 2B model](https://huggingface.co/google/gemma-2b-it) as the base language model to finetune." + "In the first part of this lab, we will fine-tune an LLM as a chatbot that can generate responses in a specific style. We will use the [Liquid AI LFM2-1.2B model](https://huggingface.co/LiquidAI/LFM2-1.2B) as the base language model to finetune." ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "VNE6ArjFfnyx" + }, "source": [ "## 1.1: Templating and tokenization\n", "\n", @@ -105,32 +115,30 @@ "\n", "[Templating](https://huggingface.co/docs/transformers/main/chat_templating) is a way to format inputs and outputs in a consistent structure that a language model can understand. It involves adding special tokens or markers to indicate different parts of the conversation, like who is speaking and where turns begin and end. This structure helps the model learn the proper format for generating responses and maintain a coherent conversation flow. Without templates, the model may not know how to properly format its outputs or distinguish between different speakers in a conversation.\n", "\n", - "Let's start by defining some basic templates for the chatbot, for turns where the user asks a question and the model responds with an answer." + "Let's start by defining some basic templates for the LFM2-based chatbot, for turns where the user asks a question and the model responds with an answer." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "TN2zHVhfBvnE", - "outputId": "abddea82-12cf-4a16-868b-2e41f85fd7f1" + "id": "TN2zHVhfBvnE" }, "outputs": [], "source": [ "# Basic question-answer template\n", - "template_without_answer = \"user\\n{question}\\nmodel\\n\"\n", - "template_with_answer = template_without_answer + \"{answer}\\n\"\n", + "template_without_answer = \"<|startoftext|><|im_start|>user\\n{question}<|im_end|>\\n<|im_start|>assistant\\n\"\n", + "template_with_answer = template_without_answer + \"{answer}<|im_end|>\\n\"\n", "\n", "# Let's try to put something into the template to see how it looks\n", - "print(template_with_answer.format(question=\"What is your name?\", answer=\"My name is Gemma!\"))" + "print(template_with_answer.format(question=\"What is your name?\", answer=\"My name is Lili!\"))" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "keh0rVN-fnyx" + }, "source": [ "### 1.1.2: Tokenization\n", "\n", @@ -144,70 +152,19 @@ "\n", "3. **Subword tokenization**: breaks words into smaller units (subwords) based on their frequency. The most popular and commonly used approach is [byte-pair encoding (BPE)](https://en.wikipedia.org/wiki/Byte_pair_encoding), which iteratively merges the most frequent character pairs. Modern language models typically use subword tokenization as it balances vocabulary size and sequence length while handling unknown words effectively by breaking them into known subword units.\n", "\n", - "In this lab we will use the tokenizer from the Gemma 2B model, which uses BPE. Let's load it and inspect it." + "In this lab we will use the tokenizer from the LFM2 model, which uses BPE. Let's load it and inspect it." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 266, - "referenced_widgets": [ - "2846d60e43a24160b177166c25dd0122", - "231e675f282d48a39e023149d4879b8b", - "ce1a72b3385c44a2b6c8c36acc48867f", - "57180ced897d4007a6d836665a032802", - "8d2df8e3bb4b410f9f671d4cd2a6e80d", - "16d840d19a804bec80ea85cafc850c13", - "8642a2df48194dc2a0314de10e0a7635", - "9a7787f0d75847219071be822ccd76ba", - "bc10c09f48534cc081dc53a4cc7bc20a", - "ba606012b7a14ad2824fe6843930ca08", - "9d5116fb35f44752a680fe7dc2b410b7", - "01bc169362704eeebd69a87d641d269e", - "7bbc93e57dda4424acb428027a9f014a", - "09b97b2a1f734e38b2a9908cf59edd8d", - "74dc454addc64783bbf1b3897a817147", - "47037605ebef451e91b64dd2fb040475", - "f701d542971a4238aa8b76affc054743", - "9498c07f6ad74b248c94de3bad444f62", - "c4dc3a623a34415a83c2ffab0e19560b", - "cde4b31291a9493f8ef649269ca11e1c", - "7899c5e27ac64478a6e6ac767da24a20", - "0b18c6ae2dee474aae96fdbd81637024", - "81b9c3a820424c67a4c050545c2daa2e", - "2318014fa6fd4452b76b5938a7da0c6f", - "df141f6e170f4af98d009fd42043a359", - "c34cba3327304cf98154ce2c73218441", - "1a949dd5e121434dbbf1b0c290d71373", - "c1d5a98c0f324e29a3628ff49718d7b6", - "d0cb6b890289454981f6b9ad8cb2a0e1", - "4495489fb35f495c898b334d75c8e1ed", - "34976cd4ca634e4cb7a5c0efffa41e81", - "64d0bc7735bf42ce800f56ebcce3cdce", - "01b7fbea9de54e338e3862e09d7e353d", - "9bead4274c0c4fc6acf12bf6b9dec75a", - "34ff40c5c4cf405d8ef59a12171b03a5", - "9d8d908e12b846d58aea8b0e48dd6b92", - "e9c00880fa4b47c7bf645c3f91a950a9", - "7d93f09ca25a498fbd4776daa0fc4c53", - "1c35e9b4250f4fca9e65ecfe4dcb4006", - "1eacc88f8b754c7e93582ce65f99b5db", - "6311ea720e344309b1d6fa1445f347e3", - "ba866548b5544345b37e29f6d8e92652", - "d5f566c5de7d4dd1808975839ab8b973", - "0e17dd9f94714fb38ecbe3bd68873c1c" - ] - }, - "id": "EeDF1JI-BvnF", - "outputId": "6c9d3a2b-0b6b-4fa1-de66-dc7879ab4d15" + "id": "EeDF1JI-BvnF" }, "outputs": [], "source": [ - "# Load the tokenizer for Gemma 2B\n", - "model_id = \"unsloth/gemma-2-2b-it\" #\"google/gemma-2-2b-it\"\n", + "# Load the tokenizer for Liquid AI LFM2-1.2B\n", + "model_id = \"LiquidAI/LFM2-1.2B\"\n", "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", "\n", "# How big is the tokenizer?\n", @@ -216,10 +173,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "sSJxCx6Nfnyx" + }, "source": [ "We not only need to be able to tokenize the text into tokens (encode), but also de-tokenize the tokens back into text (decode). Our tokenizer will have:\n", - "1. an `encode` function to tokenize the text into tokens, and \n", + "1. an `encode` function to tokenize the text into tokens, and\n", "2. a `decode` function to de-tokenize back to text so that we can read out the model's outputs.\n", "\n", "Let's test out both steps and inspect to get a better understanding of how this works." @@ -229,11 +188,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "JH1XzPkiBvnF", - "outputId": "25e68cce-5aa0-432c-ab8c-246910d6c6b0" + "id": "JH1XzPkiBvnF" }, "outputs": [], "source": [ @@ -252,22 +207,20 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "v_0H2XZUfnyx" + }, "source": [ "This is really cool. Now we have a way to move in and out of the token space.\n", "\n", - "To \"chat\" with our LLM chatbot, we need to use the tokenizer and the chat template together, in order for the model to respond to the user's question. We can use the templates defined earlier to construct a prompt for the model, without the answer. " + "To \"chat\" with our LLM chatbot, we need to use the tokenizer and the chat template together, in order for the model to respond to the user's question. We can use the templates defined earlier to construct a prompt for the model, without the answer." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "jyBxl6NIBvnF", - "outputId": "06e54226-c434-4a84-868f-a8b5b5085bbd" + "id": "jyBxl6NIBvnF" }, "outputs": [], "source": [ @@ -277,69 +230,33 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "nIw5Qzf2fnyy" + }, "source": [ - "If we were to feed this to the model, it would see that it is now the start of the model's turn, and it would generate the answer to this question. " + "If we were to feed this to the model, it would see that it is now the start of the model's turn, and it would generate the answer to this question." ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "Lgp0JVnifnyy" + }, "source": [ "## 1.2: Getting started with the LLM\n", "\n", "Now that we have a way to prepare our data, we're ready to work with our LLM!\n", "\n", - "LLMs like Gemma 2B are trained on a large corpus of text, on the task of predicting the next token in a sequence, given the previous tokens. We call this training task \"next token prediction\"; you may also see it called \"causal language modeling\" or \"autoregressive language modeling\". We can leverage models trained in this way to generate new text by sampling from the predicted probability distribution over the next token.\n", + "LLMs like LFM2 are trained on a large corpus of text, on the task of predicting the next token in a sequence, given the previous tokens. We call this training task \"next token prediction\"; you may also see it called \"causal language modeling\" or \"autoregressive language modeling\". We can leverage models trained in this way to generate new text by sampling from the predicted probability distribution over the next token.\n", "\n", - "Let's load the Gemma 2B model and start working with it. We will construct a prompt in chat template form and tokenize it. Then, we will feed it to the model to predict next token probabilities. Finally, we will get the next token (which is still numerical) and decode it to text." + "Let's load the LFM2 model and start working with it. We will construct a prompt in chat template form and tokenize it. Then, we will feed it to the model to predict next token probabilities. Finally, we will get the next token (which is still numerical) and decode it to text." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 113, - "referenced_widgets": [ - "e715b19f10c64131ba65d96bf968d72d", - "7d2b9dea260143eb8c2933a6d3592bb0", - "087ed90b113448aa9f5079457ca4ba2b", - "0f9fe85f7079487f837ef9a7a6d7cbc5", - "49680ea9e5ae4916b52e398e27f87ff5", - "5cd563e97ce742e99942f553b31e3bed", - "e988eba4dbe546d484a6c4e88cf90b88", - "f2418db0b0ee4d3ca801f11c75ac1aca", - "676328ed1fb04ff4983a5b26df17d966", - "dacf87a2148c49db9306694b5a5f33da", - "3198b48f531d4e26bff98917f9d2b592", - "02edcc6aafcf4895843ff5e93ef30f45", - "44c5c62e4af7441bafbc7734982aa660", - "4b81f2c217b24406be898b1333b56352", - "d73b1aa5cf2e46c9ac65c617af00739f", - "6e626c5ef0dd408eaf3139f6aabaf190", - "1725a2fb58b94626a34f87c66ba0e8c2", - "1d6090d1b9e24e3cb550b655b8fbe318", - "2f803afa195c476fbfb506d53645c381", - "1734b0819fe74736a0417a9e2b977695", - "d82e67b97ea24f80a1478783cfb0f365", - "586958735baa4f29978d399852dc2aff", - "34ddb97a59d940879eb53d3e4dbe177e", - "be8a8c70a4c44ca4bd6fa595b29b3a35", - "f7dba9ee7dd646f5bf4e9f8589addc83", - "23790096dbc541d49e8db4c11a772a3f", - "19f8ecfe426246eb93849b324e986d37", - "efec2d4919314a79bd55fed697631516", - "389fffd528eb47f4b443b5e311a43629", - "d26f0017695b4e42b1c2736c07575775", - "73aa48a573e349b1a05ba0bb5526bc2a", - "a239a415866d47238ffa50a5c9c0a580", - "00dffcff57a14ad28d665cd2c2a11960" - ] - }, - "id": "mWtWvgiuBvnG", - "outputId": "b06295c8-b7b7-4d95-e0d6-31f65ac595ef" + "id": "mWtWvgiuBvnG" }, "outputs": [], "source": [ @@ -351,18 +268,14 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "2SMDd5dpBvnG", - "outputId": "b5e63295-683a-4daa-9526-1ef93ed9e95a" + "id": "2SMDd5dpBvnG" }, "outputs": [], "source": [ "### Putting it together to prompt the model and generate a response ###\n", "\n", "# 1. Construct the prompt in chat template form\n", - "question = \"What is the capital of France? Use one word.\" \n", + "question = \"What is the capital of France? Use one word.\"\n", "prompt = template_without_answer.format(question=question)\n", "# prompt = template_without_answer.format('''TODO''') # TODO\n", "\n", @@ -389,7 +302,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "CJF74Cayfnyy" + }, "source": [ "Note that the model is not able to predict the answer to the question, it is only able to predict the next token in the sequence! For more complex questions, we can't just generate one token, but rather we need to generate a sequence of tokens.\n", "\n", @@ -402,11 +317,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "XnWMUQVbBvnG", - "outputId": "d0c110d0-d740-427e-abf9-312fe2dd9f5e" + "id": "XnWMUQVbBvnG" }, "outputs": [], "source": [ @@ -418,24 +329,28 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "B3zKg1qFfnyy" + }, "source": [ "Now we have the basic pipeline for generating text with an LLM!" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "lEW-YdEyfnyy" + }, "source": [ "## 1.3: Fine-tuning\n", "\n", "Fine-tuning is a technique that allows us to adapt a pre-trained neural network to better suit a downstream task, domain, or style, by training the model further on new data. By training the model further on a carefully curated dataset, we can modify its behavior, style, or capabilities. Fine-tuning is used in a variety of applications, not just language modeling. But in language modeling, fine-tuning can be used to:\n", - "- Adapt the model's writing style \n", + "- Adapt the model's writing style\n", "- Improve performance on specific tasks or domains\n", "- Teach the model new capabilities or knowledge\n", "- Reduce unwanted behaviors or biases\n", "\n", - "In this lab, you will fine-tune the Gemma LLM to adapt the model's writing style. Recall that in Lab 1 you built out a RNN-based sequence model to generate Irish folk songs. Continuing with our Irish theme, we will first fine-tune the LLM to chat in the style of a leprechaun.\n", + "In this lab, you will fine-tune the LFM2 LLM to adapt the model's writing style. Recall that in Lab 1 you built out a RNN-based sequence model to generate Irish folk songs. Continuing with our Irish theme, we will first fine-tune the LLM to chat in the style of a leprechaun.\n", "\n", "![Let's Dance!](http://33.media.tumblr.com/3d223954ad0a77f4e98a7b87136aa395/tumblr_nlct5lFVbF1qhu7oio1_500.gif)\n", "\n", @@ -446,58 +361,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 252, - "referenced_widgets": [ - "453101669bb84ec784d30fdecf9e1052", - "1acb7981a03c4d8491072db5b0f80b91", - "f0013cd0e75942a7b6f0af20d710c9f9", - "1caca54176f24a68841321407d5cb92c", - "4bf984821d194c64945609ccf5d08ab0", - "20f4fd378f6b44f386a6bdd9f0e787f7", - "72693249b56e4995815d950d33ebbbba", - "5dd29f36fb5745618d95abda81e869bb", - "f433b043c2ad41d7ba01a9ee1187fffe", - "304108b55b1c4ae58ac271e2d8616746", - "1a1e342e7aa943cd82c91b224ea01932", - "6a550e5a66704b7b819286707bd3a918", - "3d6d0fa2af094773b593a85d6c51cf48", - "8127e4af60a149f68318c0222641718f", - "ec45944210dc46058e722e9969a7dcdc", - "095a95bac5224763b7f512b468c7431d", - "b17245b343ee4c2aad1afb45814ec63c", - "fdffb194cfad4bc2a2adb90614977445", - "589c07cbcc1b4d3db5bdee5a15dbd8df", - "ad5c35c060754bc8ae7bae0832af3921", - "22f90aaa2b1642c9bf9b385010b8a4cb", - "c7e6412c823d48e9845eecb1b4e4d7f1", - "578a08d4d89b496dbca00da965b745d2", - "e8a1e9cc828f4a4d9c8f4e96b7fbb2fb", - "b631f91b3a5040e0b237936b412d274b", - "65670d440ae448c1862c9350e2784a3f", - "bb749eaf05dc4fbb9e134cc61caae11b", - "861dfc84b7364159a78379c91007e413", - "b67122a0d1b24d168be2501782effd15", - "02dbaaf3131648f8a5b0eb6bf7a4d089", - "ddabe3ec75d247468550ce9b202e30ab", - "395eb951f3044c20a6416c346c3e1cdd", - "516627614ee0481aa5ac80cc77673a54", - "34711f6447034a728316aacfc401a7e8", - "fb69e7b86acd485e814ffb0f7ef142f3", - "e92ae53e3bc14aa59b8cee25909c1d2a", - "a6cc7eb40dbb4eff9c1e9a3f3b2aa381", - "91bf23bab4a84645b07952fc7a088c36", - "151b7ed8c9ca4a3192e2a28ff99c3dc6", - "97f1a984a0a149bc9f305f18eb109b67", - "4ec2221b24b94685887b091b45f3f746", - "ee83baaeecd944a99c11f20f9b4f03fd", - "db15fee2fae44e4babb449d56aeca0f3", - "389c5f0e14a24cf08aa175f1f21b22fc" - ] - }, - "id": "kN0pHHS8BvnH", - "outputId": "a8422640-ba32-4d64-9379-1761062fd02e" + "id": "kN0pHHS8BvnH" }, "outputs": [], "source": [ @@ -515,7 +379,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "eoAfOuCrfnyy" + }, "source": [ "### 1.3.1: Chat function\n", "\n", @@ -541,7 +407,7 @@ "def chat(question, max_new_tokens=32, temperature=0.7, only_answer=False):\n", " # 1. Construct the prompt using the template\n", " prompt = template_without_answer.format(question=question)\n", - " # prompt = template_without_answer.format('''TODO''') # TODO \n", + " # prompt = template_without_answer.format('''TODO''') # TODO\n", "\n", " # 2. Tokenize the text\n", " input_ids = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n", @@ -566,7 +432,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "rPAnt_Swfnyy" + }, "source": [ "Let's try chatting with the model now to test if it works! We have a sample question here (continuing with the Irish theme); feel free to try out other questions!" ] @@ -575,11 +443,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "FDr5f2djBvnH", - "outputId": "c42789d4-fbbf-438b-fd9d-57b3e037daa5" + "id": "FDr5f2djBvnH" }, "outputs": [], "source": [ @@ -592,29 +456,27 @@ "\n", "print(answer)\n", "\n", - "'''TODO: Experiment with asking the model different questions and temperature values, and see how it responds!'''" + "### TODO: Experiment with asking the model different questions and temperature values, and see how it responds!" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "s7xpiTuCfnyy" + }, "source": [ "### 1.3.2: Parameter-efficient fine-tuning\n", "\n", - "In fine-tuning, the weights of the model are updated to better fit the fine-tuning dataset and/or task. Updating all the weights in a language model like Gemma 2B -- which has ~2 billion parameters -- is computationally expensive. There are many techniques to make fine-tuning more efficient.\n", + "In fine-tuning, the weights of the model are updated to better fit the fine-tuning dataset and/or task. Updating all the weights in a language model like LFM2-1.2B -- which has ~1 billion parameters -- is computationally expensive. There are many techniques to make fine-tuning more efficient.\n", "\n", - "We will use a technique called [LoRA](https://arxiv.org/abs/2106.09685) -- low-rank adaptation -- to make the fine-tuning process more efficient. LoRA is a way to fine-tune LLMs very efficiently by only updating a small subset of the model's parameters, and it works by adding trainable low-rank matrices to the model. While we will not go into the details of LoRA here, you can read more about it in the [LoRA paper](https://arxiv.org/abs/2106.09685). We will use the [`peft`](https://pypi.org/project/peft/) library to apply LoRA to the Gemma model." + "We will use a technique called [LoRA](https://arxiv.org/abs/2106.09685) -- low-rank adaptation -- to make the fine-tuning process more efficient. LoRA is a way to fine-tune LLMs very efficiently by only updating a small subset of the model's parameters, and it works by adding trainable low-rank matrices to the model. While we will not go into the details of LoRA here, you can read more about it in the [LoRA paper](https://arxiv.org/abs/2106.09685). We will use the [`peft`](https://pypi.org/project/peft/) library to apply LoRA to the LFM model." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Fb6Y679hBvnI", - "outputId": "8070d39e-0fd9-44cd-9c35-d86afcd99caf" + "id": "Fb6Y679hBvnI" }, "outputs": [], "source": [ @@ -646,7 +508,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "ze3rM7qmfnyy" + }, "source": [ "### 1.3.3: Forward pass and loss computation\n", "\n", @@ -689,7 +553,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "09btP87tfnyz" + }, "source": [ "### 1.3.4: Training loop for fine-tuning\n", "\n", @@ -771,22 +637,20 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "blFoO-PhBvnI", - "outputId": "d23f2002-6e4a-41b0-9710-13d394290f34" + "id": "blFoO-PhBvnI" }, "outputs": [], "source": [ - "# Call the train function to fine-tune the model! Hint: you'll start to see results after a few dozen steps.\n", - "model = train(model, train_loader, tokenizer, max_steps=50)\n", + "# Call the train function to fine-tune the model! Hint: you'll start to see results after at least 100 steps.\n", + "model = train(model, train_loader, tokenizer, max_steps=200)\n", "# model = train('''TODO''') # TODO" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "EKJOH7Ihfnyz" + }, "source": [ "Let's try chatting with the model again to see how it has changed!" ] @@ -795,11 +659,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "su4ZAG3eBvnI", - "outputId": "b21ce134-1763-4872-8b58-19328d98b76a" + "id": "su4ZAG3eBvnI" }, "outputs": [], "source": [ @@ -814,7 +674,7 @@ "source": [ "# Part 2: Evaluating a style-tuned LLM\n", "\n", - "How do we know if the model is doing well? How closely does the model's style match the style of a leprechaun? As you can see from the example above, determining whether a generated response is good or not is can seem qualitative, and it can be hard to measure how well the model is doing. \n", + "How do we know if the model is doing well? How closely does the model's style match the style of a leprechaun? As you can see from the example above, determining whether a generated response is good or not is can seem qualitative, and it can be hard to measure how well the model is doing.\n", "\n", "While benchmarks have been developed to evaluate the performance of language models on a variety of tasks, these benchmarks are not always representative of the real-world performance of the model. For example, a model may perform well on a benchmark but poorly on a more realistic task. Benchmarks are also limited in the scope of tasks they can cover and capabilities they can reflect, and there can be concerns about whether the data in the benchmark was used to train the model. Synthetic data generation and synthetic tasks are a way to address these limitations, and this is an active area of research.\n", "\n", @@ -823,7 +683,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "fLvX1miFfnyz" + }, "source": [ "### 2.1: Fine-tune well, you must!\n", "\n", @@ -840,37 +702,21 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 292, - "referenced_widgets": [ - "fe486852cda849d5b2cf2dda69c46feb", - "9af5e516b8594e7da181917ff351e019", - "c6487dbfe53345b9822b372069f34922", - "baace428cd5545718ddc6d0749e53562", - "b12294da6032493e9ac7783b8e3ddaff", - "43e58008991640f1a96e123f545ca52d", - "5c780ea0aeee467da497547d78453492", - "bf06c4115ae54e7b9da2838c9b6069a0", - "b73ef786040243589d43806a965f0eea", - "0d4c7d8c22dc49b4be6d4948a3224852", - "d95ba2612d5e409da8899e679e39c4ee" - ] - }, - "id": "-gLgE41YBvnJ", - "outputId": "174004bd-f5f1-42e0-96ff-41d480254c87" + "id": "-gLgE41YBvnJ" }, "outputs": [], "source": [ "# Load the Yoda-speak dataset and fine-tune the model using your training function\n", "train_loader, test_loader = mdl.lab3.create_dataloader(style=\"yoda\")\n", - "model = train(model, train_loader, tokenizer, max_steps=50)\n", + "model = train(model, train_loader, tokenizer, max_steps=200)\n", "# model = train('''TODO''') # TODO" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "nA8h2DcCfnyz" + }, "source": [ "Start by defining a system prompt for the judge LLM, setting the context that it will evaluate how well the outputs of your chat model follow Yoda speak. Experiment with different system prompts to see how they affect the judge LLM's evaluation! Keep in mind that a better judge LLM will give you a better evaluation of how well your Yoda model is doing, and that a better evaluation will help you improve your Yoda model." ] @@ -879,11 +725,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "REkrJ1SCBvnJ", - "outputId": "a5630cfb-5a6c-4874-9007-fe519de32220" + "id": "REkrJ1SCBvnJ" }, "outputs": [], "source": [ @@ -891,7 +733,7 @@ "\n", "'''TODO: Experiment with different system prompts to see how they affect the judge LLM's evaluation!\n", " Come back to this cell after you've generated some text from your model.'''\n", - " \n", + "\n", "system_prompt = \"\"\"\n", "You are an impartial judge that evaluates if text was written by {style}.\n", "\n", @@ -904,12 +746,12 @@ "is written exactly in the style of {style}, 5 if mixed faithfulness to the\n", "style, or 0 if the text is not at all written in the style of {style}.\n", "\n", - "The format of the your response should be a JSON dictionary and nothing else:\n", + "Directly answer with the score formatted in a dictionary.\n", + "The format of your response should only be the dictionary and nothing else:\n", "{{\"score\": }}\n", "\"\"\"\n", "\n", "style = \"Yoda\"\n", - "# example = \"\"\"The very Republic is threatened, if involved the Sith are. Hard to see, the dark side is. \"\"\"\n", "example = \"The very Republic is threatened, if involved the Sith are. Hard to see, the dark side is. Discover who this assassin is, we must. With this Naboo queen you must stay, Qui-Gon. Protect her. May the Force be with you. A vergence, you say? But you do! Revealed your opinion is. Trained as a Jedi, you request for him? Good, good, young one.\"\n", "\n", "system_prompt = system_prompt.format(style=style, example=example)\n", @@ -919,15 +761,17 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "gmdg3FNsfnyz" + }, "source": [ "### 2.2: Setting up the judge LLM\n", "\n", - "In LLM as a judge, we need to use a model that is larger (and therefore more capable) than our \"performer\" model, in our case the style fine-tuned Gemma 2B. Since it is infeasible to load larger models locally into notebooks, you will gain experience interfacing with these larger LLMs through an API served on [OpenRouter](https://openrouter.ai/). \n", + "In LLM as a judge, we need to use a model that is larger (and therefore more capable) than our \"performer\" model, in our case the style fine-tuned LFM2 1.2B. Since it is infeasible to load larger models locally into notebooks, you will gain experience interfacing with these larger LLMs through an API served on [OpenRouter](https://openrouter.ai/).\n", "\n", - "You will need to sign up for an [OpenRouter account](https://openrouter.ai/sign-up) and then [generate an API key](https://openrouter.ai/keys). Running powerful LLMs of this scale costs money -- for students in the in-person course, we can provide a credit to your OpenRouter account to allow you to run this lab. Come to office hours to receive your credit. \n", + "You will need to sign up for an [OpenRouter account](https://openrouter.ai/sign-up) and then [generate an API key](https://openrouter.ai/keys). Running powerful LLMs of this scale costs money -- for students in the in-person course, we can provide a credit to your OpenRouter account to allow you to run this lab. Come to office hours to receive your credit.\n", "\n", - "Through the OpenRouter interface, you will be able to experiment with different judge LLMs -- here we have suggested two possible larger LLMs to get you started: [Liquid AI's](https://www.liquid.ai/) [LFM-40B](https://openrouter.ai/models/liquid-ai/lfm-40b) andGoogle's [Gemma 9B](https://openrouter.ai/models/google/gemma-9b). Note there are also free models available on OpenRouter (e.g., [gemma-2-9b-it:free](https://openrouter.ai/google/gemma-2-9b-it:free)), but these will run into rate limitations if you run them too much.\n", + "Through the OpenRouter interface, you will be able to experiment with different judge LLMs -- here we have suggested one possible larger LLM to get you started: Google's [Gemini 2.5](https://openrouter.ai/google/gemini-2.5-flash/providers). Note there are also free models available on OpenRouter (e.g., [gemma-2-9b-it:free](https://openrouter.ai/google/gemma-2-9b-it:free)), but these will run into rate limitations if you run them too much.\n", "\n", "We have defined a simple class, `LLMClient`, to interact with the OpenRouter API. This class has a method `ask` that takes a user prompt and returns the model's response. Keep in mind that the judge LLM's response will be conditioned on the system prompt you provide -- the system prompt is critical to set the criteria for the evaluation!" ] @@ -936,46 +780,49 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "9S7DtGZ5BvnJ", - "outputId": "4ac889ba-43a8-4636-9341-f0b1f260faef" + "id": "9S7DtGZ5BvnJ" }, "outputs": [], "source": [ "OPENROUTER_API_KEY = \"\" # TODO: add your OpenRouter API key here\n", "assert OPENROUTER_API_KEY != \"\", \"You must set your OpenRouter API key before running this cell!\"\n", "\n", - "model_name = \"liquid/lfm2-8b-a1b\"\n", - "# model_name = \"google/gemma-2-9b-it\"\n", + "model_name = \"google/gemini-2.5-flash\"\n", "llm = mdl.lab3.LLMClient(model=model_name, api_key=OPENROUTER_API_KEY)" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "Hp9DnayMfnyz" + }, "source": [ "### 2.3: Defining the evaluation metric\n", "\n", "Great! We have set up our judge LLM, but we still need to make this quantitative. We can do this by defining a metric that uses the judge LLM to score the outputs of the model. Doing this is streamlined with Comet ML's [Opik library](https://www.comet.com/docs/opik/python-sdk-reference/), a platform for LLM evaluation and benchmarking.\n", "\n", "In prior labs, we used Comet for experiment tracking, so you should have an account and API key. If not, you can sign up for a Comet account [here](https://www.comet.com/signup?from=llm&utm_source=mit_dl&utm_medium=notebook&utm_campaign=opik) if you have not done so already. Now we will use the Comet Opik library to define a metric that uses the judge LLM to score the outputs of the model.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Opik provides a framework for creating custom judge metrcs as well as a varity of pre-built metrics for common evaluation tasks. These metrics are designed to help you quickly and effectively gauge the performance of your LLM outputs and include metrics such as Hallucination, Answer Relevance, Context Precision/Recall and more. You can learn more about the available metrics in the [`Metrics Overview section`](https://www.comet.com/docs/opik/evaluation/metrics/overview) of the Opik documentation.\n", "\n", - "Opik has a base class for defining metrics, [`base_metric.BaseMetric`](https://www.comet.com/docs/opik/python-sdk-reference/evaluation/metrics/BaseMetric.html). You will use this to define a custom metric that uses the judge LLM to evaluate text for how well it adheres to Yoda speak. Note that the judge LLM and the metric can be applied to any text, not just the outputs of the model. This is important to keep in mind, since we need both a negative control -- text in the \"base\" standard English style -- and a positive control -- training-set text in Yoda-speak style -- against which to compare the model's generations.\n", + "The Opik python SDK has a base class for defining metrics, [`base_metric.BaseMetric`](https://www.comet.com/docs/opik/python-sdk-reference/evaluation/metrics/BaseMetric.html). You will use this to define a custom metric that uses the judge LLM to evaluate text for how well it adheres to Yoda speak. Note that the judge LLM and the metric can be applied to any text, not just the outputs of the model. This is important to keep in mind, since we need both a negative control -- text in the \"base\" standard English style -- and a positive control -- training-set text in Yoda-speak style -- against which to compare the model's generations.\n", "\n", "Set the judging criteria in the system prompt, and define the `score` function to evaluate text by querying the judge LLM." - ] + ], + "metadata": { + "id": "1aTAb4JHlRQm" + } }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "llB3FgiwBvnJ", - "outputId": "6c1dbf01-298c-4097-d2fa-8a212ca69822" + "id": "llB3FgiwBvnJ" }, "outputs": [], "source": [ @@ -996,32 +843,23 @@ " prompt = self.prompt_template.format(text=text)\n", " # prompt = self.prompt_template.format('''TODO''') # TODO\n", "\n", - " # The system prompt asks the judge to output a JSON dictionary of the form: \n", - " # {\"score\": }\n", - " # To do this, we need to specify the judge to stop generating after it \n", - " # closes the JSON dictionary (i.e., when it outputs \"}\")\n", - " # Hint: Use the stop=[\"}\"] argument within the judge.ask() method to specify this.\n", - " stop = \"}\"\n", - "\n", - " # TODO: Call the judge LLM with the system prompt and the prompt template. \n", + " # TODO: Call the judge LLM with the system prompt and the prompt template.\n", " # Remember to stop the generation when the judge LLM outputs \"}\".\n", " res = self.judge.ask(\n", " system=self.system_prompt,\n", " user=prompt,\n", - " max_tokens=10,\n", - " stop=[stop]\n", + " max_tokens=20,\n", " )\n", " # res = self.judge.ask(\n", - " # system='''TODO''', \n", - " # user='''TODO''', \n", + " # system='''TODO''',\n", + " # user='''TODO''',\n", " # max_tokens='''TODO'''\n", - " # stop='''TODO'''\n", " # ) # TODO\n", "\n", " # Extract the assistant's content from the API response\n", - " # Remember to add the stop character back to the end of the response to be a \n", - " # valid JSON dictionary (its not there the judge LLM stoped once it saw it)\n", - " res = res.choices[0].message.content + stop\n", + " # Remember to add the stop character back to the end of the response to be a\n", + " # valid JSON dictionary (its not there the judge LLM stoped once it saw it)\n", + " res = res.choices[0].message.content\n", " res_dict = json.loads(res)\n", "\n", " max_score = 10 # The maximum score that the LLM should output\n", @@ -1039,15 +877,19 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "xiW6rr7Lfnyz" + }, "source": [ - "Instaniate your Comet Opik judge using the LLMJudgeEvaluator class and system prompt." + "Instantiate your Comet Opik judge using the `LLMJudgeEvaluator` class and system prompt." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "m2wCbTn-fnyz" + }, "outputs": [], "source": [ "judge = LLMJudgeEvaluator(llm, system_prompt=system_prompt)" @@ -1055,11 +897,13 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "tfphmTD2fny0" + }, "source": [ "## 2.4: Evaluating the model by scoring with your judge LLM\n", "\n", - "Now we can use the judge LLM to score the outputs of the model. We will use the `scoring_function` to score text using the judge LLM. \n", + "Now we can use the judge LLM to score the outputs of the model. We will use the `scoring_function` to score text using the judge LLM.\n", "\n", "Feed in a few probe sentences to get a vibe check on the judge LLM." ] @@ -1068,11 +912,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "D_rvQDrvBvnJ", - "outputId": "5d460cfb-4237-4a5b-e5d2-4974ea984805" + "id": "D_rvQDrvBvnJ" }, "outputs": [], "source": [ @@ -1092,7 +932,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "iBjWAXfTfny0" + }, "source": [ "We will evaluate how well our fine-tuned model is doing by scoring the outputs of the model, as well as our base-style text (negative control) and the training-set text in Yoda-speak style (positive control).\n", "\n", @@ -1125,7 +967,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "7Kz6t364fny0" + }, "source": [ "Let's also collect some base-style text (`base_samples`) and the training-set text in Yoda-speak style (`style_samples`). For these, we won't need to generate text, since we already have the text in the dataset." ] @@ -1134,11 +978,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ZEpUWV2EBvnK", - "outputId": "ff1192d3-ca28-4429-d110-47736fbaf90c" + "id": "ZEpUWV2EBvnK" }, "outputs": [], "source": [ @@ -1148,7 +988,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "drqXTryEfny0" + }, "source": [ "Now that we have our samples, we can score them using the judge LLM. We will use a multiprocessed scoring function to score the samples in parallel, because each sample is independent and we can submit them all as simultaneous requests to the judge LLM." ] @@ -1184,9 +1026,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "jQ5PKB4jfny0" + }, "source": [ - "Look at the average scores for each of the three types of text -- what do you observe? \n", + "Look at the average scores for each of the three types of text -- what do you observe?\n", "\n", "We can also plot the distribution of scores for each of the three types of text.\n" ] @@ -1195,12 +1039,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 472 - }, - "id": "V4-g0Z3_BvnK", - "outputId": "5497bdad-7878-4df5-b2b0-015b896ea072" + "id": "V4-g0Z3_BvnK" }, "outputs": [], "source": [ @@ -1222,16 +1061,130 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "qtKce8CYfny0" + }, "source": [ "Use these observations to improve your model. Remember that the judge LLM is not perfect, and you can try to improve the judge LLM to better evaluate the model's outputs. A better judge LLM will give you a better evaluation of how well your Yoda model is doing, and that better evaluation will help you improve your Yoda model." ] }, { "cell_type": "markdown", - "metadata": {}, "source": [ - "## 2.5: Conclusion\n", + "## 2.5: Monitoring with evals\n", + "\n", + "Just as we used Opik for evaluation metrics during fine-tuning and testing, we can also use Opik to monitor our LLM once it is live in deployment. This makes it easy to track the same metrics consistently across both development and deployment.\n", + "\n", + "In prior labs, we used Comet for experiment tracking, so you should have an account and API key. If not, you can sign up for a Comet account [here](https://www.comet.com/signup?from=llm&utm_source=mit_dl&utm_medium=notebook&utm_campaign=opik) if you have not done so already. We will configure Opik by setting the API key and naming our Opik project." + ], + "metadata": { + "id": "BeMLjanimqDr" + } + }, + { + "cell_type": "code", + "source": [ + "os.environ[\"OPIK_API_KEY\"] = \"\" # TODO: add your OPIK or Comet API key here\n", + "assert OPIK_API_KEY != \"\", \"You must set your OPIK or Comet API key before running this cell!\"\n", + "\n", + "# Set the project name for Opik\n", + "os.environ[\"OPIK_PROJECT_NAME\"] = \"6S191_Lab3\"\n", + "\n", + "opik.configure()" + ], + "metadata": { + "id": "uA2nuV8n6STH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "[Tracing](https://www.comet.com/docs/opik/tracing/concepts) helps you understand the end-to-end flow of your LLM application and pinpoint specific steps that may be causing issues.\n", + "\n", + "In the example below, we make a sample call to the chatbot and use Opik’s `@track` decorator to log data to the Opik UI, creating a record of live calls to the application. You can add the `@track` decorator to any function to trace not only LLM calls, but also other steps in your application pipeline." + ], + "metadata": { + "id": "VcwD9NcM6SnJ" + } + }, + { + "cell_type": "code", + "source": [ + "@opik.track\n", + "def inference_chat(question, max_new_tokens=32, temperature=0.7, only_answer=False):\n", + "\n", + " # 1. Construct the prompt using the template\n", + " prompt = template_without_answer.format(question=question)\n", + "\n", + " # 2. Tokenize the text\n", + " input_ids = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n", + "\n", + " # 3. Feed through the model to predict the next token probabilities\n", + " with torch.no_grad():\n", + " outputs = model.generate(**input_ids, do_sample=True, max_new_tokens=max_new_tokens, temperature=temperature)\n", + "\n", + " # 4. Only return the answer if only_answer is True\n", + " output_tokens = outputs[0]\n", + " if only_answer:\n", + " output_tokens = output_tokens[input_ids['input_ids'].shape[1]:]\n", + "\n", + " # 5. Decode the tokens\n", + " result = tokenizer.decode(output_tokens, skip_special_tokens=True)\n", + "\n", + " # Update the current trace with evaluation scores\n", + " opik_context.update_current_trace(\n", + " feedback_scores=[\n", + " {\n", + " \"name\": \"Yoda style eval\",\n", + " \"value\": scoring_function(result)\n", + " }\n", + " ]\n", + " )\n", + "\n", + " return result" + ], + "metadata": { + "id": "VqQdGAKfm-8H" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Now you can make an example call to your model to see the trace logged to Opik. Once you run the cell below you should see a link to your Opik UI where your traces are logged to your project. Follow that link to see your traces in the Opik platform." + ], + "metadata": { + "id": "uYXA6wOPnAhb" + } + }, + { + "cell_type": "code", + "source": [ + "# Let's try chatting with the model now to see the traces produced with the score\n", + "answer = inference_chat(\n", + " \"Who was the only non-Jedi to wield a lightsaber in the original Star Wars trilogy?\",\n", + " only_answer=True,\n", + " max_new_tokens=32,\n", + ")\n", + "\n", + "print(answer)" + ], + "metadata": { + "id": "uLYflR9DnCfM" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pRhbKTy2fny0" + }, + "source": [ + "## 2.6: Conclusion\n", "\n", "Experiment with both your chat model and your judge LLM to try to improve the quality of the Yoda-speak. The competition for this lab will be based on the following criteria:\n", "* **Likelihood of true Yoda-speak under your chat model**: the better your chat model does at understanding Yoda-speak, it will estimate a lower cross entropy loss for language that is true Yoda-speak. At the end of this lab, you will evaluate the likelihood of a held-out test-sample of true Yoda-speak under your chat model. Include this likelihood in your report. This gives us a quantitative measure to compare different chat models (which may have interacted with different judge LLMs).\n", @@ -1244,11 +1197,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "MqnrG24FBvnK", - "outputId": "b93c17db-6968-40f3-a012-b3b202185bb6" + "id": "MqnrG24FBvnK" }, "outputs": [], "source": [ @@ -1271,11 +1220,13 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "XmlmIGJyfny0" + }, "source": [ "# Submission information\n", "\n", - "To enter the competition, please upload the following to the lab [submission site for the Large Language Models Lab](https://www.dropbox.com/request/l2JH7UlrayUl1Ps5ZVZm)):\n", + "To enter the competition, please upload the following to the lab [submission site for the Large Language Models Lab](https://www.dropbox.com/request/l2JH7UlrayUl1Ps5ZVZm):\n", "\n", "* Jupyter notebook with the code you used to generate your results;\n", "* copy of the bar plot showing the judge LLM's scores of text in base style, generated text, and text in true Yoda-speak style;\n", @@ -1289,11 +1240,6 @@ "\n", "" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] } ], "metadata": { @@ -1317,4116 +1263,8 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "00dffcff57a14ad28d665cd2c2a11960": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "01b7fbea9de54e338e3862e09d7e353d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "01bc169362704eeebd69a87d641d269e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_7bbc93e57dda4424acb428027a9f014a", - "IPY_MODEL_09b97b2a1f734e38b2a9908cf59edd8d", - "IPY_MODEL_74dc454addc64783bbf1b3897a817147" - ], - "layout": "IPY_MODEL_47037605ebef451e91b64dd2fb040475" - } - }, - "02dbaaf3131648f8a5b0eb6bf7a4d089": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "02edcc6aafcf4895843ff5e93ef30f45": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_44c5c62e4af7441bafbc7734982aa660", - "IPY_MODEL_4b81f2c217b24406be898b1333b56352", - "IPY_MODEL_d73b1aa5cf2e46c9ac65c617af00739f" - ], - "layout": "IPY_MODEL_6e626c5ef0dd408eaf3139f6aabaf190" - } - }, - "087ed90b113448aa9f5079457ca4ba2b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f2418db0b0ee4d3ca801f11c75ac1aca", - "max": 913, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_676328ed1fb04ff4983a5b26df17d966", - "value": 913 - } - }, - "095a95bac5224763b7f512b468c7431d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "09b97b2a1f734e38b2a9908cf59edd8d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c4dc3a623a34415a83c2ffab0e19560b", - "max": 4241003, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_cde4b31291a9493f8ef649269ca11e1c", - "value": 4241003 - } - }, - "0b18c6ae2dee474aae96fdbd81637024": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0d4c7d8c22dc49b4be6d4948a3224852": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0e17dd9f94714fb38ecbe3bd68873c1c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0f9fe85f7079487f837ef9a7a6d7cbc5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_dacf87a2148c49db9306694b5a5f33da", - "placeholder": "​", - "style": "IPY_MODEL_3198b48f531d4e26bff98917f9d2b592", - "value": " 913/913 [00:00<00:00, 69.3kB/s]" - } - }, - "151b7ed8c9ca4a3192e2a28ff99c3dc6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "16d840d19a804bec80ea85cafc850c13": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1725a2fb58b94626a34f87c66ba0e8c2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1734b0819fe74736a0417a9e2b977695": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "19f8ecfe426246eb93849b324e986d37": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1a1e342e7aa943cd82c91b224ea01932": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "1a949dd5e121434dbbf1b0c290d71373": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1acb7981a03c4d8491072db5b0f80b91": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_20f4fd378f6b44f386a6bdd9f0e787f7", - "placeholder": "​", - "style": "IPY_MODEL_72693249b56e4995815d950d33ebbbba", - "value": "README.md: 100%" - } - }, - "1c35e9b4250f4fca9e65ecfe4dcb4006": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1caca54176f24a68841321407d5cb92c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_304108b55b1c4ae58ac271e2d8616746", - "placeholder": "​", - "style": "IPY_MODEL_1a1e342e7aa943cd82c91b224ea01932", - "value": " 8.20k/8.20k [00:00<00:00, 349kB/s]" - } - }, - "1d6090d1b9e24e3cb550b655b8fbe318": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "1eacc88f8b754c7e93582ce65f99b5db": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "20f4fd378f6b44f386a6bdd9f0e787f7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "22f90aaa2b1642c9bf9b385010b8a4cb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2318014fa6fd4452b76b5938a7da0c6f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c1d5a98c0f324e29a3628ff49718d7b6", - "placeholder": "​", - "style": "IPY_MODEL_d0cb6b890289454981f6b9ad8cb2a0e1", - "value": "tokenizer.json: 100%" - } - }, - "231e675f282d48a39e023149d4879b8b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_16d840d19a804bec80ea85cafc850c13", - "placeholder": "​", - "style": "IPY_MODEL_8642a2df48194dc2a0314de10e0a7635", - "value": "tokenizer_config.json: 100%" - } - }, - "23790096dbc541d49e8db4c11a772a3f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a239a415866d47238ffa50a5c9c0a580", - "placeholder": "​", - "style": "IPY_MODEL_00dffcff57a14ad28d665cd2c2a11960", - "value": " 209/209 [00:00<00:00, 14.3kB/s]" - } - }, - "2846d60e43a24160b177166c25dd0122": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_231e675f282d48a39e023149d4879b8b", - "IPY_MODEL_ce1a72b3385c44a2b6c8c36acc48867f", - "IPY_MODEL_57180ced897d4007a6d836665a032802" - ], - "layout": "IPY_MODEL_8d2df8e3bb4b410f9f671d4cd2a6e80d" - } - }, - "2f803afa195c476fbfb506d53645c381": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "304108b55b1c4ae58ac271e2d8616746": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3198b48f531d4e26bff98917f9d2b592": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "34711f6447034a728316aacfc401a7e8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_fb69e7b86acd485e814ffb0f7ef142f3", - "IPY_MODEL_e92ae53e3bc14aa59b8cee25909c1d2a", - "IPY_MODEL_a6cc7eb40dbb4eff9c1e9a3f3b2aa381" - ], - "layout": "IPY_MODEL_91bf23bab4a84645b07952fc7a088c36" - } - }, - "34976cd4ca634e4cb7a5c0efffa41e81": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "34ddb97a59d940879eb53d3e4dbe177e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_be8a8c70a4c44ca4bd6fa595b29b3a35", - "IPY_MODEL_f7dba9ee7dd646f5bf4e9f8589addc83", - "IPY_MODEL_23790096dbc541d49e8db4c11a772a3f" - ], - "layout": "IPY_MODEL_19f8ecfe426246eb93849b324e986d37" - } - }, - "34ff40c5c4cf405d8ef59a12171b03a5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1c35e9b4250f4fca9e65ecfe4dcb4006", - "placeholder": "​", - "style": "IPY_MODEL_1eacc88f8b754c7e93582ce65f99b5db", - "value": "special_tokens_map.json: 100%" - } - }, - "389c5f0e14a24cf08aa175f1f21b22fc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "389fffd528eb47f4b443b5e311a43629": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "395eb951f3044c20a6416c346c3e1cdd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3d6d0fa2af094773b593a85d6c51cf48": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b17245b343ee4c2aad1afb45814ec63c", - "placeholder": "​", - "style": "IPY_MODEL_fdffb194cfad4bc2a2adb90614977445", - "value": "databricks-dolly-15k.jsonl: 100%" - } - }, - "43e58008991640f1a96e123f545ca52d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4495489fb35f495c898b334d75c8e1ed": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "44c5c62e4af7441bafbc7734982aa660": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1725a2fb58b94626a34f87c66ba0e8c2", - "placeholder": "​", - "style": "IPY_MODEL_1d6090d1b9e24e3cb550b655b8fbe318", - "value": "model.safetensors: 100%" - } - }, - "453101669bb84ec784d30fdecf9e1052": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_1acb7981a03c4d8491072db5b0f80b91", - "IPY_MODEL_f0013cd0e75942a7b6f0af20d710c9f9", - "IPY_MODEL_1caca54176f24a68841321407d5cb92c" - ], - "layout": "IPY_MODEL_4bf984821d194c64945609ccf5d08ab0" - } - }, - "47037605ebef451e91b64dd2fb040475": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "49680ea9e5ae4916b52e398e27f87ff5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4b81f2c217b24406be898b1333b56352": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2f803afa195c476fbfb506d53645c381", - "max": 5228717512, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_1734b0819fe74736a0417a9e2b977695", - "value": 5228717512 - } - }, - "4bf984821d194c64945609ccf5d08ab0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4ec2221b24b94685887b091b45f3f746": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "516627614ee0481aa5ac80cc77673a54": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "57180ced897d4007a6d836665a032802": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ba606012b7a14ad2824fe6843930ca08", - "placeholder": "​", - "style": "IPY_MODEL_9d5116fb35f44752a680fe7dc2b410b7", - "value": " 47.0k/47.0k [00:00<00:00, 2.43MB/s]" - } - }, - "578a08d4d89b496dbca00da965b745d2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_e8a1e9cc828f4a4d9c8f4e96b7fbb2fb", - "IPY_MODEL_b631f91b3a5040e0b237936b412d274b", - "IPY_MODEL_65670d440ae448c1862c9350e2784a3f" - ], - "layout": "IPY_MODEL_bb749eaf05dc4fbb9e134cc61caae11b" - } - }, - "586958735baa4f29978d399852dc2aff": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "589c07cbcc1b4d3db5bdee5a15dbd8df": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5c780ea0aeee467da497547d78453492": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5cd563e97ce742e99942f553b31e3bed": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5dd29f36fb5745618d95abda81e869bb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6311ea720e344309b1d6fa1445f347e3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "64d0bc7735bf42ce800f56ebcce3cdce": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "65670d440ae448c1862c9350e2784a3f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_395eb951f3044c20a6416c346c3e1cdd", - "placeholder": "​", - "style": "IPY_MODEL_516627614ee0481aa5ac80cc77673a54", - "value": " 15011/15011 [00:00<00:00, 62410.60 examples/s]" - } - }, - "676328ed1fb04ff4983a5b26df17d966": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "6a550e5a66704b7b819286707bd3a918": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_3d6d0fa2af094773b593a85d6c51cf48", - "IPY_MODEL_8127e4af60a149f68318c0222641718f", - "IPY_MODEL_ec45944210dc46058e722e9969a7dcdc" - ], - "layout": "IPY_MODEL_095a95bac5224763b7f512b468c7431d" - } - }, - "6e626c5ef0dd408eaf3139f6aabaf190": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "72693249b56e4995815d950d33ebbbba": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "73aa48a573e349b1a05ba0bb5526bc2a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "74dc454addc64783bbf1b3897a817147": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7899c5e27ac64478a6e6ac767da24a20", - "placeholder": "​", - "style": "IPY_MODEL_0b18c6ae2dee474aae96fdbd81637024", - "value": " 4.24M/4.24M [00:00<00:00, 31.6MB/s]" - } - }, - "7899c5e27ac64478a6e6ac767da24a20": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7bbc93e57dda4424acb428027a9f014a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f701d542971a4238aa8b76affc054743", - "placeholder": "​", - "style": "IPY_MODEL_9498c07f6ad74b248c94de3bad444f62", - "value": "tokenizer.model: 100%" - } - }, - "7d2b9dea260143eb8c2933a6d3592bb0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5cd563e97ce742e99942f553b31e3bed", - "placeholder": "​", - "style": "IPY_MODEL_e988eba4dbe546d484a6c4e88cf90b88", - "value": "config.json: 100%" - } - }, - "7d93f09ca25a498fbd4776daa0fc4c53": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8127e4af60a149f68318c0222641718f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_589c07cbcc1b4d3db5bdee5a15dbd8df", - "max": 13085339, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_ad5c35c060754bc8ae7bae0832af3921", - "value": 13085339 - } - }, - "81b9c3a820424c67a4c050545c2daa2e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_2318014fa6fd4452b76b5938a7da0c6f", - "IPY_MODEL_df141f6e170f4af98d009fd42043a359", - "IPY_MODEL_c34cba3327304cf98154ce2c73218441" - ], - "layout": "IPY_MODEL_1a949dd5e121434dbbf1b0c290d71373" - } - }, - "861dfc84b7364159a78379c91007e413": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8642a2df48194dc2a0314de10e0a7635": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "8d2df8e3bb4b410f9f671d4cd2a6e80d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "91bf23bab4a84645b07952fc7a088c36": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9498c07f6ad74b248c94de3bad444f62": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "97f1a984a0a149bc9f305f18eb109b67": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9a7787f0d75847219071be822ccd76ba": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9af5e516b8594e7da181917ff351e019": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_43e58008991640f1a96e123f545ca52d", - "placeholder": "​", - "style": "IPY_MODEL_5c780ea0aeee467da497547d78453492", - "value": "Map: 100%" - } - }, - "9bead4274c0c4fc6acf12bf6b9dec75a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_34ff40c5c4cf405d8ef59a12171b03a5", - "IPY_MODEL_9d8d908e12b846d58aea8b0e48dd6b92", - "IPY_MODEL_e9c00880fa4b47c7bf645c3f91a950a9" - ], - "layout": "IPY_MODEL_7d93f09ca25a498fbd4776daa0fc4c53" - } - }, - "9d5116fb35f44752a680fe7dc2b410b7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9d8d908e12b846d58aea8b0e48dd6b92": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6311ea720e344309b1d6fa1445f347e3", - "max": 636, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_ba866548b5544345b37e29f6d8e92652", - "value": 636 - } - }, - "a239a415866d47238ffa50a5c9c0a580": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a6cc7eb40dbb4eff9c1e9a3f3b2aa381": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_db15fee2fae44e4babb449d56aeca0f3", - "placeholder": "​", - "style": "IPY_MODEL_389c5f0e14a24cf08aa175f1f21b22fc", - "value": " 2048/2048 [00:00<00:00, 6639.89 examples/s]" - } - }, - "ad5c35c060754bc8ae7bae0832af3921": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "b12294da6032493e9ac7783b8e3ddaff": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b17245b343ee4c2aad1afb45814ec63c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b631f91b3a5040e0b237936b412d274b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_02dbaaf3131648f8a5b0eb6bf7a4d089", - "max": 15011, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_ddabe3ec75d247468550ce9b202e30ab", - "value": 15011 - } - }, - "b67122a0d1b24d168be2501782effd15": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b73ef786040243589d43806a965f0eea": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "ba606012b7a14ad2824fe6843930ca08": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ba866548b5544345b37e29f6d8e92652": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "baace428cd5545718ddc6d0749e53562": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0d4c7d8c22dc49b4be6d4948a3224852", - "placeholder": "​", - "style": "IPY_MODEL_d95ba2612d5e409da8899e679e39c4ee", - "value": " 2048/2048 [00:00<00:00, 8114.30 examples/s]" - } - }, - "bb749eaf05dc4fbb9e134cc61caae11b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "bc10c09f48534cc081dc53a4cc7bc20a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "be8a8c70a4c44ca4bd6fa595b29b3a35": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_efec2d4919314a79bd55fed697631516", - "placeholder": "​", - "style": "IPY_MODEL_389fffd528eb47f4b443b5e311a43629", - "value": "generation_config.json: 100%" - } - }, - "bf06c4115ae54e7b9da2838c9b6069a0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c1d5a98c0f324e29a3628ff49718d7b6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c34cba3327304cf98154ce2c73218441": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_64d0bc7735bf42ce800f56ebcce3cdce", - "placeholder": "​", - "style": "IPY_MODEL_01b7fbea9de54e338e3862e09d7e353d", - "value": " 17.5M/17.5M [00:00<00:00, 42.3MB/s]" - } - }, - "c4dc3a623a34415a83c2ffab0e19560b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c6487dbfe53345b9822b372069f34922": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_bf06c4115ae54e7b9da2838c9b6069a0", - "max": 2048, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_b73ef786040243589d43806a965f0eea", - "value": 2048 - } - }, - "c7e6412c823d48e9845eecb1b4e4d7f1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "cde4b31291a9493f8ef649269ca11e1c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "ce1a72b3385c44a2b6c8c36acc48867f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9a7787f0d75847219071be822ccd76ba", - "max": 47022, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_bc10c09f48534cc081dc53a4cc7bc20a", - "value": 47022 - } - }, - "d0cb6b890289454981f6b9ad8cb2a0e1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "d26f0017695b4e42b1c2736c07575775": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d5f566c5de7d4dd1808975839ab8b973": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d73b1aa5cf2e46c9ac65c617af00739f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d82e67b97ea24f80a1478783cfb0f365", - "placeholder": "​", - "style": "IPY_MODEL_586958735baa4f29978d399852dc2aff", - "value": " 5.23G/5.23G [02:03<00:00, 42.5MB/s]" - } - }, - "d82e67b97ea24f80a1478783cfb0f365": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d95ba2612d5e409da8899e679e39c4ee": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "dacf87a2148c49db9306694b5a5f33da": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "db15fee2fae44e4babb449d56aeca0f3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ddabe3ec75d247468550ce9b202e30ab": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "df141f6e170f4af98d009fd42043a359": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4495489fb35f495c898b334d75c8e1ed", - "max": 17525357, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_34976cd4ca634e4cb7a5c0efffa41e81", - "value": 17525357 - } - }, - "e715b19f10c64131ba65d96bf968d72d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_7d2b9dea260143eb8c2933a6d3592bb0", - "IPY_MODEL_087ed90b113448aa9f5079457ca4ba2b", - "IPY_MODEL_0f9fe85f7079487f837ef9a7a6d7cbc5" - ], - "layout": "IPY_MODEL_49680ea9e5ae4916b52e398e27f87ff5" - } - }, - "e8a1e9cc828f4a4d9c8f4e96b7fbb2fb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_861dfc84b7364159a78379c91007e413", - "placeholder": "​", - "style": "IPY_MODEL_b67122a0d1b24d168be2501782effd15", - "value": "Generating train split: 100%" - } - }, - "e92ae53e3bc14aa59b8cee25909c1d2a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4ec2221b24b94685887b091b45f3f746", - "max": 2048, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_ee83baaeecd944a99c11f20f9b4f03fd", - "value": 2048 - } - }, - "e988eba4dbe546d484a6c4e88cf90b88": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "e9c00880fa4b47c7bf645c3f91a950a9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d5f566c5de7d4dd1808975839ab8b973", - "placeholder": "​", - "style": "IPY_MODEL_0e17dd9f94714fb38ecbe3bd68873c1c", - "value": " 636/636 [00:00<00:00, 33.0kB/s]" - } - }, - "ec45944210dc46058e722e9969a7dcdc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_22f90aaa2b1642c9bf9b385010b8a4cb", - "placeholder": "​", - "style": "IPY_MODEL_c7e6412c823d48e9845eecb1b4e4d7f1", - "value": " 13.1M/13.1M [00:00<00:00, 49.8MB/s]" - } - }, - "ee83baaeecd944a99c11f20f9b4f03fd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "efec2d4919314a79bd55fed697631516": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f0013cd0e75942a7b6f0af20d710c9f9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5dd29f36fb5745618d95abda81e869bb", - "max": 8199, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_f433b043c2ad41d7ba01a9ee1187fffe", - "value": 8199 - } - }, - "f2418db0b0ee4d3ca801f11c75ac1aca": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f433b043c2ad41d7ba01a9ee1187fffe": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "f701d542971a4238aa8b76affc054743": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f7dba9ee7dd646f5bf4e9f8589addc83": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d26f0017695b4e42b1c2736c07575775", - "max": 209, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_73aa48a573e349b1a05ba0bb5526bc2a", - "value": 209 - } - }, - "fb69e7b86acd485e814ffb0f7ef142f3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_151b7ed8c9ca4a3192e2a28ff99c3dc6", - "placeholder": "​", - "style": "IPY_MODEL_97f1a984a0a149bc9f305f18eb109b67", - "value": "Map: 100%" - } - }, - "fdffb194cfad4bc2a2adb90614977445": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "fe486852cda849d5b2cf2dda69c46feb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9af5e516b8594e7da181917ff351e019", - "IPY_MODEL_c6487dbfe53345b9822b372069f34922", - "IPY_MODEL_baace428cd5545718ddc6d0749e53562" - ], - "layout": "IPY_MODEL_b12294da6032493e9ac7783b8e3ddaff" - } - } - } } }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file From d46e5809cf35cde60f468076d2b3c1d11eed66a2 Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 22:14:58 -0500 Subject: [PATCH 33/34] removing the mention of stop for json --- lab3/solutions/LLM_Finetuning_Solution.ipynb | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lab3/solutions/LLM_Finetuning_Solution.ipynb b/lab3/solutions/LLM_Finetuning_Solution.ipynb index b7656646..df20838d 100644 --- a/lab3/solutions/LLM_Finetuning_Solution.ipynb +++ b/lab3/solutions/LLM_Finetuning_Solution.ipynb @@ -844,7 +844,6 @@ " # prompt = self.prompt_template.format('''TODO''') # TODO\n", "\n", " # TODO: Call the judge LLM with the system prompt and the prompt template.\n", - " # Remember to stop the generation when the judge LLM outputs \"}\".\n", " res = self.judge.ask(\n", " system=self.system_prompt,\n", " user=prompt,\n", @@ -857,8 +856,6 @@ " # ) # TODO\n", "\n", " # Extract the assistant's content from the API response\n", - " # Remember to add the stop character back to the end of the response to be a\n", - " # valid JSON dictionary (its not there the judge LLM stoped once it saw it)\n", " res = res.choices[0].message.content\n", " res_dict = json.loads(res)\n", "\n", @@ -1267,4 +1264,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} From 1338884e21170e2e918dcf2a899ea33b9e87060e Mon Sep 17 00:00:00 2001 From: Ava Amini Date: Sat, 3 Jan 2026 22:16:29 -0500 Subject: [PATCH 34/34] student version w/ LFM, Gemini, Opik updates --- lab3/LLM_Finetuning.ipynb | 4730 +++---------------------------------- 1 file changed, 283 insertions(+), 4447 deletions(-) diff --git a/lab3/LLM_Finetuning.ipynb b/lab3/LLM_Finetuning.ipynb index 58213d2f..e72ef6d0 100644 --- a/lab3/LLM_Finetuning.ipynb +++ b/lab3/LLM_Finetuning.ipynb @@ -2,15 +2,17 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "yh8WeSsQfnyw" + }, "source": [ "\n", " \n", - " \n", - " \n", "
\n", " \n", " Visit MIT Deep Learning\n", + " \n", " Run in Google Colab\n", + " \n", " View Source on GitHub
\n", "\n", @@ -20,7 +22,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "O-pRdpMbfnyw" + }, "outputs": [], "source": [ "# Copyright 2026 MIT Introduction to Deep Learning. All Rights Reserved.\n", @@ -36,13 +40,15 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "StmM5Grmfnyx" + }, "source": [ "# Laboratory 3: Large Language Model (LLM) Fine-tuning\n", "\n", "In this lab, you will fine-tune a multi-billion parameter large language model (LLM). We will go through several fundamental concepts of LLMs, including tokenization, templates, and fine-tuning. This lab provides a complete pipeline for fine-tuning a language model to generate responses in a specific style, and you will explore not only language model fine-tuning, but also ways to evaluate the performance of a language model.\n", "\n", - "You will use Google's [Gemma 2B](https://huggingface.co/google/gemma-2b-it) model as the base language model to fine-tune; [Liquid AI's](https://www.liquid.ai/) [LFM-40B](https://www.liquid.ai/liquid-foundation-models) as an evaluation \"judge\" model; and Comet ML's [Opik](https://www.comet.com/site/products/opik/) as a framework for streamlined LLM evaluation.\n", + "You will use [Liquid AI's](https://www.liquid.ai/) [LFM2-1.2B](https://huggingface.co/LiquidAI/LFM2-1.2B) as the base language model to fine-tune; Google's [Gemini 2.5](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/) model as an evaluation \"judge\" model; and Comet ML's [Opik](https://www.comet.com/site/products/opik/) as a framework for streamlined LLM evaluation.\n", "\n", "First, let's download the MIT deep learning package, install dependencies, and import the relevant packages we'll need for this lab." ] @@ -86,16 +92,20 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "j-qsDChnfnyx" + }, "source": [ "# Part 1: Fine-tuning an LLM for style\n", "\n", - "In the first part of this lab, we will fine-tune an LLM as a chatbot that can generate responses in a specific style. We will use the [Gemma 2B model](https://huggingface.co/google/gemma-2b-it) as the base language model to finetune." + "In the first part of this lab, we will fine-tune an LLM as a chatbot that can generate responses in a specific style. We will use the [Liquid AI LFM2-1.2B model](https://huggingface.co/LiquidAI/LFM2-1.2B) as the base language model to finetune." ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "VNE6ArjFfnyx" + }, "source": [ "## 1.1: Templating and tokenization\n", "\n", @@ -105,32 +115,30 @@ "\n", "[Templating](https://huggingface.co/docs/transformers/main/chat_templating) is a way to format inputs and outputs in a consistent structure that a language model can understand. It involves adding special tokens or markers to indicate different parts of the conversation, like who is speaking and where turns begin and end. This structure helps the model learn the proper format for generating responses and maintain a coherent conversation flow. Without templates, the model may not know how to properly format its outputs or distinguish between different speakers in a conversation.\n", "\n", - "Let's start by defining some basic templates for the chatbot, for turns where the user asks a question and the model responds with an answer." + "Let's start by defining some basic templates for the LFM2-based chatbot, for turns where the user asks a question and the model responds with an answer." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "TN2zHVhfBvnE", - "outputId": "abddea82-12cf-4a16-868b-2e41f85fd7f1" + "id": "TN2zHVhfBvnE" }, "outputs": [], "source": [ "# Basic question-answer template\n", - "template_without_answer = \"user\\n{question}\\nmodel\\n\"\n", - "template_with_answer = template_without_answer + \"{answer}\\n\"\n", + "template_without_answer = \"<|startoftext|><|im_start|>user\\n{question}<|im_end|>\\n<|im_start|>assistant\\n\"\n", + "template_with_answer = template_without_answer + \"{answer}<|im_end|>\\n\"\n", "\n", "# Let's try to put something into the template to see how it looks\n", - "print(template_with_answer.format(question=\"What is your name?\", answer=\"My name is Gemma!\"))" + "print(template_with_answer.format(question=\"What is your name?\", answer=\"My name is Lili!\"))" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "keh0rVN-fnyx" + }, "source": [ "### 1.1.2: Tokenization\n", "\n", @@ -144,70 +152,19 @@ "\n", "3. **Subword tokenization**: breaks words into smaller units (subwords) based on their frequency. The most popular and commonly used approach is [byte-pair encoding (BPE)](https://en.wikipedia.org/wiki/Byte_pair_encoding), which iteratively merges the most frequent character pairs. Modern language models typically use subword tokenization as it balances vocabulary size and sequence length while handling unknown words effectively by breaking them into known subword units.\n", "\n", - "In this lab we will use the tokenizer from the Gemma 2B model, which uses BPE. Let's load it and inspect it." + "In this lab we will use the tokenizer from the LFM2 model, which uses BPE. Let's load it and inspect it." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 266, - "referenced_widgets": [ - "2846d60e43a24160b177166c25dd0122", - "231e675f282d48a39e023149d4879b8b", - "ce1a72b3385c44a2b6c8c36acc48867f", - "57180ced897d4007a6d836665a032802", - "8d2df8e3bb4b410f9f671d4cd2a6e80d", - "16d840d19a804bec80ea85cafc850c13", - "8642a2df48194dc2a0314de10e0a7635", - "9a7787f0d75847219071be822ccd76ba", - "bc10c09f48534cc081dc53a4cc7bc20a", - "ba606012b7a14ad2824fe6843930ca08", - "9d5116fb35f44752a680fe7dc2b410b7", - "01bc169362704eeebd69a87d641d269e", - "7bbc93e57dda4424acb428027a9f014a", - "09b97b2a1f734e38b2a9908cf59edd8d", - "74dc454addc64783bbf1b3897a817147", - "47037605ebef451e91b64dd2fb040475", - "f701d542971a4238aa8b76affc054743", - "9498c07f6ad74b248c94de3bad444f62", - "c4dc3a623a34415a83c2ffab0e19560b", - "cde4b31291a9493f8ef649269ca11e1c", - "7899c5e27ac64478a6e6ac767da24a20", - "0b18c6ae2dee474aae96fdbd81637024", - "81b9c3a820424c67a4c050545c2daa2e", - "2318014fa6fd4452b76b5938a7da0c6f", - "df141f6e170f4af98d009fd42043a359", - "c34cba3327304cf98154ce2c73218441", - "1a949dd5e121434dbbf1b0c290d71373", - "c1d5a98c0f324e29a3628ff49718d7b6", - "d0cb6b890289454981f6b9ad8cb2a0e1", - "4495489fb35f495c898b334d75c8e1ed", - "34976cd4ca634e4cb7a5c0efffa41e81", - "64d0bc7735bf42ce800f56ebcce3cdce", - "01b7fbea9de54e338e3862e09d7e353d", - "9bead4274c0c4fc6acf12bf6b9dec75a", - "34ff40c5c4cf405d8ef59a12171b03a5", - "9d8d908e12b846d58aea8b0e48dd6b92", - "e9c00880fa4b47c7bf645c3f91a950a9", - "7d93f09ca25a498fbd4776daa0fc4c53", - "1c35e9b4250f4fca9e65ecfe4dcb4006", - "1eacc88f8b754c7e93582ce65f99b5db", - "6311ea720e344309b1d6fa1445f347e3", - "ba866548b5544345b37e29f6d8e92652", - "d5f566c5de7d4dd1808975839ab8b973", - "0e17dd9f94714fb38ecbe3bd68873c1c" - ] - }, - "id": "EeDF1JI-BvnF", - "outputId": "6c9d3a2b-0b6b-4fa1-de66-dc7879ab4d15" + "id": "EeDF1JI-BvnF" }, "outputs": [], "source": [ - "# Load the tokenizer for Gemma 2B\n", - "model_id = \"unsloth/gemma-2-2b-it\" #\"google/gemma-2-2b-it\"\n", + "# Load the tokenizer for Liquid AI LFM2-1.2B\n", + "model_id = \"LiquidAI/LFM2-1.2B\"\n", "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", "\n", "# How big is the tokenizer?\n", @@ -216,10 +173,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "sSJxCx6Nfnyx" + }, "source": [ "We not only need to be able to tokenize the text into tokens (encode), but also de-tokenize the tokens back into text (decode). Our tokenizer will have:\n", - "1. an `encode` function to tokenize the text into tokens, and \n", + "1. an `encode` function to tokenize the text into tokens, and\n", "2. a `decode` function to de-tokenize back to text so that we can read out the model's outputs.\n", "\n", "Let's test out both steps and inspect to get a better understanding of how this works." @@ -229,11 +188,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "JH1XzPkiBvnF", - "outputId": "25e68cce-5aa0-432c-ab8c-246910d6c6b0" + "id": "JH1XzPkiBvnF" }, "outputs": [], "source": [ @@ -252,22 +207,20 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "v_0H2XZUfnyx" + }, "source": [ "This is really cool. Now we have a way to move in and out of the token space.\n", "\n", - "To \"chat\" with our LLM chatbot, we need to use the tokenizer and the chat template together, in order for the model to respond to the user's question. We can use the templates defined earlier to construct a prompt for the model, without the answer. " + "To \"chat\" with our LLM chatbot, we need to use the tokenizer and the chat template together, in order for the model to respond to the user's question. We can use the templates defined earlier to construct a prompt for the model, without the answer." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "jyBxl6NIBvnF", - "outputId": "06e54226-c434-4a84-868f-a8b5b5085bbd" + "id": "jyBxl6NIBvnF" }, "outputs": [], "source": [ @@ -277,69 +230,33 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "nIw5Qzf2fnyy" + }, "source": [ - "If we were to feed this to the model, it would see that it is now the start of the model's turn, and it would generate the answer to this question. " + "If we were to feed this to the model, it would see that it is now the start of the model's turn, and it would generate the answer to this question." ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "Lgp0JVnifnyy" + }, "source": [ "## 1.2: Getting started with the LLM\n", "\n", "Now that we have a way to prepare our data, we're ready to work with our LLM!\n", "\n", - "LLMs like Gemma 2B are trained on a large corpus of text, on the task of predicting the next token in a sequence, given the previous tokens. We call this training task \"next token prediction\"; you may also see it called \"causal language modeling\" or \"autoregressive language modeling\". We can leverage models trained in this way to generate new text by sampling from the predicted probability distribution over the next token.\n", + "LLMs like LFM2 are trained on a large corpus of text, on the task of predicting the next token in a sequence, given the previous tokens. We call this training task \"next token prediction\"; you may also see it called \"causal language modeling\" or \"autoregressive language modeling\". We can leverage models trained in this way to generate new text by sampling from the predicted probability distribution over the next token.\n", "\n", - "Let's load the Gemma 2B model and start working with it. We will construct a prompt in chat template form and tokenize it. Then, we will feed it to the model to predict next token probabilities. Finally, we will get the next token (which is still numerical) and decode it to text." + "Let's load the LFM2 model and start working with it. We will construct a prompt in chat template form and tokenize it. Then, we will feed it to the model to predict next token probabilities. Finally, we will get the next token (which is still numerical) and decode it to text." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 113, - "referenced_widgets": [ - "e715b19f10c64131ba65d96bf968d72d", - "7d2b9dea260143eb8c2933a6d3592bb0", - "087ed90b113448aa9f5079457ca4ba2b", - "0f9fe85f7079487f837ef9a7a6d7cbc5", - "49680ea9e5ae4916b52e398e27f87ff5", - "5cd563e97ce742e99942f553b31e3bed", - "e988eba4dbe546d484a6c4e88cf90b88", - "f2418db0b0ee4d3ca801f11c75ac1aca", - "676328ed1fb04ff4983a5b26df17d966", - "dacf87a2148c49db9306694b5a5f33da", - "3198b48f531d4e26bff98917f9d2b592", - "02edcc6aafcf4895843ff5e93ef30f45", - "44c5c62e4af7441bafbc7734982aa660", - "4b81f2c217b24406be898b1333b56352", - "d73b1aa5cf2e46c9ac65c617af00739f", - "6e626c5ef0dd408eaf3139f6aabaf190", - "1725a2fb58b94626a34f87c66ba0e8c2", - "1d6090d1b9e24e3cb550b655b8fbe318", - "2f803afa195c476fbfb506d53645c381", - "1734b0819fe74736a0417a9e2b977695", - "d82e67b97ea24f80a1478783cfb0f365", - "586958735baa4f29978d399852dc2aff", - "34ddb97a59d940879eb53d3e4dbe177e", - "be8a8c70a4c44ca4bd6fa595b29b3a35", - "f7dba9ee7dd646f5bf4e9f8589addc83", - "23790096dbc541d49e8db4c11a772a3f", - "19f8ecfe426246eb93849b324e986d37", - "efec2d4919314a79bd55fed697631516", - "389fffd528eb47f4b443b5e311a43629", - "d26f0017695b4e42b1c2736c07575775", - "73aa48a573e349b1a05ba0bb5526bc2a", - "a239a415866d47238ffa50a5c9c0a580", - "00dffcff57a14ad28d665cd2c2a11960" - ] - }, - "id": "mWtWvgiuBvnG", - "outputId": "b06295c8-b7b7-4d95-e0d6-31f65ac595ef" + "id": "mWtWvgiuBvnG" }, "outputs": [], "source": [ @@ -351,18 +268,14 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "2SMDd5dpBvnG", - "outputId": "b5e63295-683a-4daa-9526-1ef93ed9e95a" + "id": "2SMDd5dpBvnG" }, "outputs": [], "source": [ "### Putting it together to prompt the model and generate a response ###\n", "\n", "# 1. Construct the prompt in chat template form\n", - "question = \"What is the capital of France? Use one word.\" \n", + "question = \"What is the capital of France? Use one word.\"\n", "prompt = template_without_answer.format('''TODO''') # TODO\n", "\n", "# 2. Tokenize the prompt\n", @@ -386,7 +299,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "CJF74Cayfnyy" + }, "source": [ "Note that the model is not able to predict the answer to the question, it is only able to predict the next token in the sequence! For more complex questions, we can't just generate one token, but rather we need to generate a sequence of tokens.\n", "\n", @@ -399,11 +314,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "XnWMUQVbBvnG", - "outputId": "d0c110d0-d740-427e-abf9-312fe2dd9f5e" + "id": "XnWMUQVbBvnG" }, "outputs": [], "source": [ @@ -415,24 +326,28 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "B3zKg1qFfnyy" + }, "source": [ "Now we have the basic pipeline for generating text with an LLM!" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "lEW-YdEyfnyy" + }, "source": [ "## 1.3: Fine-tuning\n", "\n", "Fine-tuning is a technique that allows us to adapt a pre-trained neural network to better suit a downstream task, domain, or style, by training the model further on new data. By training the model further on a carefully curated dataset, we can modify its behavior, style, or capabilities. Fine-tuning is used in a variety of applications, not just language modeling. But in language modeling, fine-tuning can be used to:\n", - "- Adapt the model's writing style \n", + "- Adapt the model's writing style\n", "- Improve performance on specific tasks or domains\n", "- Teach the model new capabilities or knowledge\n", "- Reduce unwanted behaviors or biases\n", "\n", - "In this lab, you will fine-tune the Gemma LLM to adapt the model's writing style. Recall that in Lab 1 you built out a RNN-based sequence model to generate Irish folk songs. Continuing with our Irish theme, we will first fine-tune the LLM to chat in the style of a leprechaun.\n", + "In this lab, you will fine-tune the LFM2 LLM to adapt the model's writing style. Recall that in Lab 1 you built out a RNN-based sequence model to generate Irish folk songs. Continuing with our Irish theme, we will first fine-tune the LLM to chat in the style of a leprechaun.\n", "\n", "![Let's Dance!](http://33.media.tumblr.com/3d223954ad0a77f4e98a7b87136aa395/tumblr_nlct5lFVbF1qhu7oio1_500.gif)\n", "\n", @@ -443,58 +358,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 252, - "referenced_widgets": [ - "453101669bb84ec784d30fdecf9e1052", - "1acb7981a03c4d8491072db5b0f80b91", - "f0013cd0e75942a7b6f0af20d710c9f9", - "1caca54176f24a68841321407d5cb92c", - "4bf984821d194c64945609ccf5d08ab0", - "20f4fd378f6b44f386a6bdd9f0e787f7", - "72693249b56e4995815d950d33ebbbba", - "5dd29f36fb5745618d95abda81e869bb", - "f433b043c2ad41d7ba01a9ee1187fffe", - "304108b55b1c4ae58ac271e2d8616746", - "1a1e342e7aa943cd82c91b224ea01932", - "6a550e5a66704b7b819286707bd3a918", - "3d6d0fa2af094773b593a85d6c51cf48", - "8127e4af60a149f68318c0222641718f", - "ec45944210dc46058e722e9969a7dcdc", - "095a95bac5224763b7f512b468c7431d", - "b17245b343ee4c2aad1afb45814ec63c", - "fdffb194cfad4bc2a2adb90614977445", - "589c07cbcc1b4d3db5bdee5a15dbd8df", - "ad5c35c060754bc8ae7bae0832af3921", - "22f90aaa2b1642c9bf9b385010b8a4cb", - "c7e6412c823d48e9845eecb1b4e4d7f1", - "578a08d4d89b496dbca00da965b745d2", - "e8a1e9cc828f4a4d9c8f4e96b7fbb2fb", - "b631f91b3a5040e0b237936b412d274b", - "65670d440ae448c1862c9350e2784a3f", - "bb749eaf05dc4fbb9e134cc61caae11b", - "861dfc84b7364159a78379c91007e413", - "b67122a0d1b24d168be2501782effd15", - "02dbaaf3131648f8a5b0eb6bf7a4d089", - "ddabe3ec75d247468550ce9b202e30ab", - "395eb951f3044c20a6416c346c3e1cdd", - "516627614ee0481aa5ac80cc77673a54", - "34711f6447034a728316aacfc401a7e8", - "fb69e7b86acd485e814ffb0f7ef142f3", - "e92ae53e3bc14aa59b8cee25909c1d2a", - "a6cc7eb40dbb4eff9c1e9a3f3b2aa381", - "91bf23bab4a84645b07952fc7a088c36", - "151b7ed8c9ca4a3192e2a28ff99c3dc6", - "97f1a984a0a149bc9f305f18eb109b67", - "4ec2221b24b94685887b091b45f3f746", - "ee83baaeecd944a99c11f20f9b4f03fd", - "db15fee2fae44e4babb449d56aeca0f3", - "389c5f0e14a24cf08aa175f1f21b22fc" - ] - }, - "id": "kN0pHHS8BvnH", - "outputId": "a8422640-ba32-4d64-9379-1761062fd02e" + "id": "kN0pHHS8BvnH" }, "outputs": [], "source": [ @@ -512,7 +376,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "eoAfOuCrfnyy" + }, "source": [ "### 1.3.1: Chat function\n", "\n", @@ -537,7 +403,7 @@ "source": [ "def chat(question, max_new_tokens=32, temperature=0.7, only_answer=False):\n", " # 1. Construct the prompt using the template\n", - " prompt = template_without_answer.format('''TODO''') # TODO \n", + " prompt = template_without_answer.format('''TODO''') # TODO\n", "\n", " # 2. Tokenize the text\n", " input_ids = tokenizer('''TODO''', '''TODO''').to(model.device) # TODO\n", @@ -559,7 +425,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "rPAnt_Swfnyy" + }, "source": [ "Let's try chatting with the model now to test if it works! We have a sample question here (continuing with the Irish theme); feel free to try out other questions!" ] @@ -568,11 +436,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "FDr5f2djBvnH", - "outputId": "c42789d4-fbbf-438b-fd9d-57b3e037daa5" + "id": "FDr5f2djBvnH" }, "outputs": [], "source": [ @@ -585,29 +449,27 @@ "\n", "print(answer)\n", "\n", - "'''TODO: Experiment with asking the model different questions and temperature values, and see how it responds!'''" + "### TODO: Experiment with asking the model different questions and temperature values, and see how it responds!" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "s7xpiTuCfnyy" + }, "source": [ "### 1.3.2: Parameter-efficient fine-tuning\n", "\n", - "In fine-tuning, the weights of the model are updated to better fit the fine-tuning dataset and/or task. Updating all the weights in a language model like Gemma 2B -- which has ~2 billion parameters -- is computationally expensive. There are many techniques to make fine-tuning more efficient.\n", + "In fine-tuning, the weights of the model are updated to better fit the fine-tuning dataset and/or task. Updating all the weights in a language model like LFM2-1.2B -- which has ~1 billion parameters -- is computationally expensive. There are many techniques to make fine-tuning more efficient.\n", "\n", - "We will use a technique called [LoRA](https://arxiv.org/abs/2106.09685) -- low-rank adaptation -- to make the fine-tuning process more efficient. LoRA is a way to fine-tune LLMs very efficiently by only updating a small subset of the model's parameters, and it works by adding trainable low-rank matrices to the model. While we will not go into the details of LoRA here, you can read more about it in the [LoRA paper](https://arxiv.org/abs/2106.09685). We will use the [`peft`](https://pypi.org/project/peft/) library to apply LoRA to the Gemma model." + "We will use a technique called [LoRA](https://arxiv.org/abs/2106.09685) -- low-rank adaptation -- to make the fine-tuning process more efficient. LoRA is a way to fine-tune LLMs very efficiently by only updating a small subset of the model's parameters, and it works by adding trainable low-rank matrices to the model. While we will not go into the details of LoRA here, you can read more about it in the [LoRA paper](https://arxiv.org/abs/2106.09685). We will use the [`peft`](https://pypi.org/project/peft/) library to apply LoRA to the LFM model." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Fb6Y679hBvnI", - "outputId": "8070d39e-0fd9-44cd-9c35-d86afcd99caf" + "id": "Fb6Y679hBvnI" }, "outputs": [], "source": [ @@ -639,7 +501,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "ze3rM7qmfnyy" + }, "source": [ "### 1.3.3: Forward pass and loss computation\n", "\n", @@ -682,7 +546,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "09btP87tfnyz" + }, "source": [ "### 1.3.4: Training loop for fine-tuning\n", "\n", @@ -756,21 +622,19 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "blFoO-PhBvnI", - "outputId": "d23f2002-6e4a-41b0-9710-13d394290f34" + "id": "blFoO-PhBvnI" }, "outputs": [], "source": [ - "# Call the train function to fine-tune the model! Hint: you'll start to see results after a few dozen steps.\n", + "# Call the train function to fine-tune the model! Hint: you'll start to see results after at least 100 steps.\n", "model = train('''TODO''') # TODO" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "EKJOH7Ihfnyz" + }, "source": [ "Let's try chatting with the model again to see how it has changed!" ] @@ -779,11 +643,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "su4ZAG3eBvnI", - "outputId": "b21ce134-1763-4872-8b58-19328d98b76a" + "id": "su4ZAG3eBvnI" }, "outputs": [], "source": [ @@ -798,7 +658,7 @@ "source": [ "# Part 2: Evaluating a style-tuned LLM\n", "\n", - "How do we know if the model is doing well? How closely does the model's style match the style of a leprechaun? As you can see from the example above, determining whether a generated response is good or not is can seem qualitative, and it can be hard to measure how well the model is doing. \n", + "How do we know if the model is doing well? How closely does the model's style match the style of a leprechaun? As you can see from the example above, determining whether a generated response is good or not is can seem qualitative, and it can be hard to measure how well the model is doing.\n", "\n", "While benchmarks have been developed to evaluate the performance of language models on a variety of tasks, these benchmarks are not always representative of the real-world performance of the model. For example, a model may perform well on a benchmark but poorly on a more realistic task. Benchmarks are also limited in the scope of tasks they can cover and capabilities they can reflect, and there can be concerns about whether the data in the benchmark was used to train the model. Synthetic data generation and synthetic tasks are a way to address these limitations, and this is an active area of research.\n", "\n", @@ -807,7 +667,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "fLvX1miFfnyz" + }, "source": [ "### 2.1: Fine-tune well, you must!\n", "\n", @@ -824,25 +686,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 292, - "referenced_widgets": [ - "fe486852cda849d5b2cf2dda69c46feb", - "9af5e516b8594e7da181917ff351e019", - "c6487dbfe53345b9822b372069f34922", - "baace428cd5545718ddc6d0749e53562", - "b12294da6032493e9ac7783b8e3ddaff", - "43e58008991640f1a96e123f545ca52d", - "5c780ea0aeee467da497547d78453492", - "bf06c4115ae54e7b9da2838c9b6069a0", - "b73ef786040243589d43806a965f0eea", - "0d4c7d8c22dc49b4be6d4948a3224852", - "d95ba2612d5e409da8899e679e39c4ee" - ] - }, - "id": "-gLgE41YBvnJ", - "outputId": "174004bd-f5f1-42e0-96ff-41d480254c87" + "id": "-gLgE41YBvnJ" }, "outputs": [], "source": [ @@ -853,7 +697,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "nA8h2DcCfnyz" + }, "source": [ "Start by defining a system prompt for the judge LLM, setting the context that it will evaluate how well the outputs of your chat model follow Yoda speak. Experiment with different system prompts to see how they affect the judge LLM's evaluation! Keep in mind that a better judge LLM will give you a better evaluation of how well your Yoda model is doing, and that a better evaluation will help you improve your Yoda model." ] @@ -862,11 +708,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "REkrJ1SCBvnJ", - "outputId": "a5630cfb-5a6c-4874-9007-fe519de32220" + "id": "REkrJ1SCBvnJ" }, "outputs": [], "source": [ @@ -874,7 +716,7 @@ "\n", "'''TODO: Experiment with different system prompts to see how they affect the judge LLM's evaluation!\n", " Come back to this cell after you've generated some text from your model.'''\n", - " \n", + "\n", "system_prompt = \"\"\"\n", "You are an impartial judge that evaluates if text was written by {style}.\n", "\n", @@ -887,12 +729,12 @@ "is written exactly in the style of {style}, 5 if mixed faithfulness to the\n", "style, or 0 if the text is not at all written in the style of {style}.\n", "\n", - "The format of the your response should be a JSON dictionary and nothing else:\n", + "Directly answer with the score formatted in a dictionary.\n", + "The format of your response should only be the dictionary and nothing else:\n", "{{\"score\": }}\n", "\"\"\"\n", "\n", "style = \"Yoda\"\n", - "# example = \"\"\"The very Republic is threatened, if involved the Sith are. Hard to see, the dark side is. \"\"\"\n", "example = \"The very Republic is threatened, if involved the Sith are. Hard to see, the dark side is. Discover who this assassin is, we must. With this Naboo queen you must stay, Qui-Gon. Protect her. May the Force be with you. A vergence, you say? But you do! Revealed your opinion is. Trained as a Jedi, you request for him? Good, good, young one.\"\n", "\n", "system_prompt = system_prompt.format(style=style, example=example)\n", @@ -902,15 +744,17 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "gmdg3FNsfnyz" + }, "source": [ "### 2.2: Setting up the judge LLM\n", "\n", - "In LLM as a judge, we need to use a model that is larger (and therefore more capable) than our \"performer\" model, in our case the style fine-tuned Gemma 2B. Since it is infeasible to load larger models locally into notebooks, you will gain experience interfacing with these larger LLMs through an API served on [OpenRouter](https://openrouter.ai/). \n", + "In LLM as a judge, we need to use a model that is larger (and therefore more capable) than our \"performer\" model, in our case the style fine-tuned LFM2 1.2B. Since it is infeasible to load larger models locally into notebooks, you will gain experience interfacing with these larger LLMs through an API served on [OpenRouter](https://openrouter.ai/).\n", "\n", - "You will need to sign up for an [OpenRouter account](https://openrouter.ai/sign-up) and then [generate an API key](https://openrouter.ai/keys). Running powerful LLMs of this scale costs money -- for students in the in-person course, we can provide a credit to your OpenRouter account to allow you to run this lab. Come to office hours to receive your credit. \n", + "You will need to sign up for an [OpenRouter account](https://openrouter.ai/sign-up) and then [generate an API key](https://openrouter.ai/keys). Running powerful LLMs of this scale costs money -- for students in the in-person course, we can provide a credit to your OpenRouter account to allow you to run this lab. Come to office hours to receive your credit.\n", "\n", - "Through the OpenRouter interface, you will be able to experiment with different judge LLMs -- here we have suggested two possible larger LLMs to get you started: [Liquid AI's](https://www.liquid.ai/) [LFM-40B](https://openrouter.ai/models/liquid-ai/lfm-40b) andGoogle's [Gemma 9B](https://openrouter.ai/models/google/gemma-9b). Note there are also free models available on OpenRouter (e.g., [gemma-2-9b-it:free](https://openrouter.ai/google/gemma-2-9b-it:free)), but these will run into rate limitations if you run them too much.\n", + "Through the OpenRouter interface, you will be able to experiment with different judge LLMs -- here we have suggested one possible larger LLM to get you started: Google's [Gemini 2.5](https://openrouter.ai/google/gemini-2.5-flash/providers). Note there are also free models available on OpenRouter (e.g., [gemma-2-9b-it:free](https://openrouter.ai/google/gemma-2-9b-it:free)), but these will run into rate limitations if you run them too much.\n", "\n", "We have defined a simple class, `LLMClient`, to interact with the OpenRouter API. This class has a method `ask` that takes a user prompt and returns the model's response. Keep in mind that the judge LLM's response will be conditioned on the system prompt you provide -- the system prompt is critical to set the criteria for the evaluation!" ] @@ -919,46 +763,49 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "9S7DtGZ5BvnJ", - "outputId": "4ac889ba-43a8-4636-9341-f0b1f260faef" + "id": "9S7DtGZ5BvnJ" }, "outputs": [], "source": [ "OPENROUTER_API_KEY = \"\" # TODO: add your OpenRouter API key here\n", "assert OPENROUTER_API_KEY != \"\", \"You must set your OpenRouter API key before running this cell!\"\n", "\n", - "model_name = \"liquid/lfm2-8b-a1b\"\n", - "# model_name = \"google/gemma-2-9b-it\"\n", + "model_name = \"google/gemini-2.5-flash\"\n", "llm = mdl.lab3.LLMClient(model=model_name, api_key=OPENROUTER_API_KEY)" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "Hp9DnayMfnyz" + }, "source": [ "### 2.3: Defining the evaluation metric\n", "\n", "Great! We have set up our judge LLM, but we still need to make this quantitative. We can do this by defining a metric that uses the judge LLM to score the outputs of the model. Doing this is streamlined with Comet ML's [Opik library](https://www.comet.com/docs/opik/python-sdk-reference/), a platform for LLM evaluation and benchmarking.\n", "\n", "In prior labs, we used Comet for experiment tracking, so you should have an account and API key. If not, you can sign up for a Comet account [here](https://www.comet.com/signup?from=llm&utm_source=mit_dl&utm_medium=notebook&utm_campaign=opik) if you have not done so already. Now we will use the Comet Opik library to define a metric that uses the judge LLM to score the outputs of the model.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Opik provides a framework for creating custom judge metrcs as well as a varity of pre-built metrics for common evaluation tasks. These metrics are designed to help you quickly and effectively gauge the performance of your LLM outputs and include metrics such as Hallucination, Answer Relevance, Context Precision/Recall and more. You can learn more about the available metrics in the [`Metrics Overview section`](https://www.comet.com/docs/opik/evaluation/metrics/overview) of the Opik documentation.\n", "\n", - "Opik has a base class for defining metrics, [`base_metric.BaseMetric`](https://www.comet.com/docs/opik/python-sdk-reference/evaluation/metrics/BaseMetric.html). You will use this to define a custom metric that uses the judge LLM to evaluate text for how well it adheres to Yoda speak. Note that the judge LLM and the metric can be applied to any text, not just the outputs of the model. This is important to keep in mind, since we need both a negative control -- text in the \"base\" standard English style -- and a positive control -- training-set text in Yoda-speak style -- against which to compare the model's generations.\n", + "The Opik python SDK has a base class for defining metrics, [`base_metric.BaseMetric`](https://www.comet.com/docs/opik/python-sdk-reference/evaluation/metrics/BaseMetric.html). You will use this to define a custom metric that uses the judge LLM to evaluate text for how well it adheres to Yoda speak. Note that the judge LLM and the metric can be applied to any text, not just the outputs of the model. This is important to keep in mind, since we need both a negative control -- text in the \"base\" standard English style -- and a positive control -- training-set text in Yoda-speak style -- against which to compare the model's generations.\n", "\n", "Set the judging criteria in the system prompt, and define the `score` function to evaluate text by querying the judge LLM." - ] + ], + "metadata": { + "id": "1aTAb4JHlRQm" + } }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "llB3FgiwBvnJ", - "outputId": "6c1dbf01-298c-4097-d2fa-8a212ca69822" + "id": "llB3FgiwBvnJ" }, "outputs": [], "source": [ @@ -978,26 +825,15 @@ " # TODO: Convert the text to template form before passing it to the judge LLM\n", " prompt = self.prompt_template.format('''TODO''') # TODO\n", "\n", - " # The system prompt asks the judge to output a JSON dictionary of the form: \n", - " # {\"score\": }\n", - " # To do this, we need to specify the judge to stop generating after it \n", - " # closes the JSON dictionary (i.e., when it outputs \"}\")\n", - " # Hint: Use the stop=[\"}\"] argument within the judge.ask() method to specify this.\n", - " stop = \"}\"\n", - "\n", - " # TODO: Call the judge LLM with the system prompt and the prompt template. \n", - " # Remember to stop the generation when the judge LLM outputs \"}\".\n", + " # TODO: Call the judge LLM with the system prompt and the prompt template.\n", " res = self.judge.ask(\n", - " system='''TODO''', \n", - " user='''TODO''', \n", + " system='''TODO''',\n", + " user='''TODO''',\n", " max_tokens='''TODO'''\n", - " stop='''TODO'''\n", " ) # TODO\n", "\n", " # Extract the assistant's content from the API response\n", - " # Remember to add the stop character back to the end of the response to be a \n", - " # valid JSON dictionary (its not there the judge LLM stoped once it saw it)\n", - " res = res.choices[0].message.content + stop\n", + " res = res.choices[0].message.content\n", " res_dict = json.loads(res)\n", "\n", " max_score = 10 # The maximum score that the LLM should output\n", @@ -1015,15 +851,19 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "xiW6rr7Lfnyz" + }, "source": [ - "Instaniate your Comet Opik judge using the LLMJudgeEvaluator class and system prompt." + "Instantiate your Comet Opik judge using the `LLMJudgeEvaluator` class and system prompt." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "m2wCbTn-fnyz" + }, "outputs": [], "source": [ "judge = LLMJudgeEvaluator(llm, system_prompt=system_prompt)" @@ -1031,11 +871,13 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "tfphmTD2fny0" + }, "source": [ "## 2.4: Evaluating the model by scoring with your judge LLM\n", "\n", - "Now we can use the judge LLM to score the outputs of the model. We will use the `scoring_function` to score text using the judge LLM. \n", + "Now we can use the judge LLM to score the outputs of the model. We will use the `scoring_function` to score text using the judge LLM.\n", "\n", "Feed in a few probe sentences to get a vibe check on the judge LLM." ] @@ -1044,11 +886,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "D_rvQDrvBvnJ", - "outputId": "5d460cfb-4237-4a5b-e5d2-4974ea984805" + "id": "D_rvQDrvBvnJ" }, "outputs": [], "source": [ @@ -1068,7 +906,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "iBjWAXfTfny0" + }, "source": [ "We will evaluate how well our fine-tuned model is doing by scoring the outputs of the model, as well as our base-style text (negative control) and the training-set text in Yoda-speak style (positive control).\n", "\n", @@ -1101,7 +941,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "7Kz6t364fny0" + }, "source": [ "Let's also collect some base-style text (`base_samples`) and the training-set text in Yoda-speak style (`style_samples`). For these, we won't need to generate text, since we already have the text in the dataset." ] @@ -1110,11 +952,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ZEpUWV2EBvnK", - "outputId": "ff1192d3-ca28-4429-d110-47736fbaf90c" + "id": "ZEpUWV2EBvnK" }, "outputs": [], "source": [ @@ -1124,7 +962,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "drqXTryEfny0" + }, "source": [ "Now that we have our samples, we can score them using the judge LLM. We will use a multiprocessed scoring function to score the samples in parallel, because each sample is independent and we can submit them all as simultaneous requests to the judge LLM." ] @@ -1160,9 +1000,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "jQ5PKB4jfny0" + }, "source": [ - "Look at the average scores for each of the three types of text -- what do you observe? \n", + "Look at the average scores for each of the three types of text -- what do you observe?\n", "\n", "We can also plot the distribution of scores for each of the three types of text.\n" ] @@ -1171,12 +1013,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 472 - }, - "id": "V4-g0Z3_BvnK", - "outputId": "5497bdad-7878-4df5-b2b0-015b896ea072" + "id": "V4-g0Z3_BvnK" }, "outputs": [], "source": [ @@ -1198,16 +1035,130 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "qtKce8CYfny0" + }, "source": [ "Use these observations to improve your model. Remember that the judge LLM is not perfect, and you can try to improve the judge LLM to better evaluate the model's outputs. A better judge LLM will give you a better evaluation of how well your Yoda model is doing, and that better evaluation will help you improve your Yoda model." ] }, { "cell_type": "markdown", - "metadata": {}, "source": [ - "## 2.5: Conclusion\n", + "## 2.5: Monitoring with evals\n", + "\n", + "Just as we used Opik for evaluation metrics during fine-tuning and testing, we can also use Opik to monitor our LLM once it is live in deployment. This makes it easy to track the same metrics consistently across both development and deployment.\n", + "\n", + "In prior labs, we used Comet for experiment tracking, so you should have an account and API key. If not, you can sign up for a Comet account [here](https://www.comet.com/signup?from=llm&utm_source=mit_dl&utm_medium=notebook&utm_campaign=opik) if you have not done so already. We will configure Opik by setting the API key and naming our Opik project." + ], + "metadata": { + "id": "BeMLjanimqDr" + } + }, + { + "cell_type": "code", + "source": [ + "os.environ[\"OPIK_API_KEY\"] = \"\" # TODO: add your OPIK or Comet API key here\n", + "assert OPIK_API_KEY != \"\", \"You must set your OPIK or Comet API key before running this cell!\"\n", + "\n", + "# Set the project name for Opik\n", + "os.environ[\"OPIK_PROJECT_NAME\"] = \"6S191_Lab3\"\n", + "\n", + "opik.configure()" + ], + "metadata": { + "id": "uA2nuV8n6STH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "[Tracing](https://www.comet.com/docs/opik/tracing/concepts) helps you understand the end-to-end flow of your LLM application and pinpoint specific steps that may be causing issues.\n", + "\n", + "In the example below, we make a sample call to the chatbot and use Opik’s `@track` decorator to log data to the Opik UI, creating a record of live calls to the application. You can add the `@track` decorator to any function to trace not only LLM calls, but also other steps in your application pipeline." + ], + "metadata": { + "id": "VcwD9NcM6SnJ" + } + }, + { + "cell_type": "code", + "source": [ + "@opik.track\n", + "def inference_chat(question, max_new_tokens=32, temperature=0.7, only_answer=False):\n", + "\n", + " # 1. Construct the prompt using the template\n", + " prompt = template_without_answer.format(question=question)\n", + "\n", + " # 2. Tokenize the text\n", + " input_ids = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n", + "\n", + " # 3. Feed through the model to predict the next token probabilities\n", + " with torch.no_grad():\n", + " outputs = model.generate(**input_ids, do_sample=True, max_new_tokens=max_new_tokens, temperature=temperature)\n", + "\n", + " # 4. Only return the answer if only_answer is True\n", + " output_tokens = outputs[0]\n", + " if only_answer:\n", + " output_tokens = output_tokens[input_ids['input_ids'].shape[1]:]\n", + "\n", + " # 5. Decode the tokens\n", + " result = tokenizer.decode(output_tokens, skip_special_tokens=True)\n", + "\n", + " # Update the current trace with evaluation scores\n", + " opik_context.update_current_trace(\n", + " feedback_scores=[\n", + " {\n", + " \"name\": \"Yoda style eval\",\n", + " \"value\": scoring_function(result)\n", + " }\n", + " ]\n", + " )\n", + "\n", + " return result" + ], + "metadata": { + "id": "VqQdGAKfm-8H" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Now you can make an example call to your model to see the trace logged to Opik. Once you run the cell below you should see a link to your Opik UI where your traces are logged to your project. Follow that link to see your traces in the Opik platform." + ], + "metadata": { + "id": "uYXA6wOPnAhb" + } + }, + { + "cell_type": "code", + "source": [ + "# Let's try chatting with the model now to see the traces produced with the score\n", + "answer = inference_chat(\n", + " \"Who was the only non-Jedi to wield a lightsaber in the original Star Wars trilogy?\",\n", + " only_answer=True,\n", + " max_new_tokens=32,\n", + ")\n", + "\n", + "print(answer)" + ], + "metadata": { + "id": "uLYflR9DnCfM" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pRhbKTy2fny0" + }, + "source": [ + "## 2.6: Conclusion\n", "\n", "Experiment with both your chat model and your judge LLM to try to improve the quality of the Yoda-speak. The competition for this lab will be based on the following criteria:\n", "* **Likelihood of true Yoda-speak under your chat model**: the better your chat model does at understanding Yoda-speak, it will estimate a lower cross entropy loss for language that is true Yoda-speak. At the end of this lab, you will evaluate the likelihood of a held-out test-sample of true Yoda-speak under your chat model. Include this likelihood in your report. This gives us a quantitative measure to compare different chat models (which may have interacted with different judge LLMs).\n", @@ -1220,11 +1171,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "MqnrG24FBvnK", - "outputId": "b93c17db-6968-40f3-a012-b3b202185bb6" + "id": "MqnrG24FBvnK" }, "outputs": [], "source": [ @@ -1247,11 +1194,13 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "XmlmIGJyfny0" + }, "source": [ "# Submission information\n", "\n", - "To enter the competition, please upload the following to the lab [submission site for the Large Language Models Lab](https://www.dropbox.com/request/l2JH7UlrayUl1Ps5ZVZm)):\n", + "To enter the competition, please upload the following to the lab [submission site for the Large Language Models Lab](https://www.dropbox.com/request/l2JH7UlrayUl1Ps5ZVZm):\n", "\n", "* Jupyter notebook with the code you used to generate your results;\n", "* copy of the bar plot showing the judge LLM's scores of text in base style, generated text, and text in true Yoda-speak style;\n", @@ -1265,11 +1214,6 @@ "\n", "" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] } ], "metadata": { @@ -1293,4116 +1237,8 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "00dffcff57a14ad28d665cd2c2a11960": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "01b7fbea9de54e338e3862e09d7e353d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "01bc169362704eeebd69a87d641d269e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_7bbc93e57dda4424acb428027a9f014a", - "IPY_MODEL_09b97b2a1f734e38b2a9908cf59edd8d", - "IPY_MODEL_74dc454addc64783bbf1b3897a817147" - ], - "layout": "IPY_MODEL_47037605ebef451e91b64dd2fb040475" - } - }, - "02dbaaf3131648f8a5b0eb6bf7a4d089": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "02edcc6aafcf4895843ff5e93ef30f45": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_44c5c62e4af7441bafbc7734982aa660", - "IPY_MODEL_4b81f2c217b24406be898b1333b56352", - "IPY_MODEL_d73b1aa5cf2e46c9ac65c617af00739f" - ], - "layout": "IPY_MODEL_6e626c5ef0dd408eaf3139f6aabaf190" - } - }, - "087ed90b113448aa9f5079457ca4ba2b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f2418db0b0ee4d3ca801f11c75ac1aca", - "max": 913, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_676328ed1fb04ff4983a5b26df17d966", - "value": 913 - } - }, - "095a95bac5224763b7f512b468c7431d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "09b97b2a1f734e38b2a9908cf59edd8d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c4dc3a623a34415a83c2ffab0e19560b", - "max": 4241003, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_cde4b31291a9493f8ef649269ca11e1c", - "value": 4241003 - } - }, - "0b18c6ae2dee474aae96fdbd81637024": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0d4c7d8c22dc49b4be6d4948a3224852": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0e17dd9f94714fb38ecbe3bd68873c1c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0f9fe85f7079487f837ef9a7a6d7cbc5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_dacf87a2148c49db9306694b5a5f33da", - "placeholder": "​", - "style": "IPY_MODEL_3198b48f531d4e26bff98917f9d2b592", - "value": " 913/913 [00:00<00:00, 69.3kB/s]" - } - }, - "151b7ed8c9ca4a3192e2a28ff99c3dc6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "16d840d19a804bec80ea85cafc850c13": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1725a2fb58b94626a34f87c66ba0e8c2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1734b0819fe74736a0417a9e2b977695": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "19f8ecfe426246eb93849b324e986d37": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1a1e342e7aa943cd82c91b224ea01932": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "1a949dd5e121434dbbf1b0c290d71373": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1acb7981a03c4d8491072db5b0f80b91": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_20f4fd378f6b44f386a6bdd9f0e787f7", - "placeholder": "​", - "style": "IPY_MODEL_72693249b56e4995815d950d33ebbbba", - "value": "README.md: 100%" - } - }, - "1c35e9b4250f4fca9e65ecfe4dcb4006": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1caca54176f24a68841321407d5cb92c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_304108b55b1c4ae58ac271e2d8616746", - "placeholder": "​", - "style": "IPY_MODEL_1a1e342e7aa943cd82c91b224ea01932", - "value": " 8.20k/8.20k [00:00<00:00, 349kB/s]" - } - }, - "1d6090d1b9e24e3cb550b655b8fbe318": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "1eacc88f8b754c7e93582ce65f99b5db": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "20f4fd378f6b44f386a6bdd9f0e787f7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "22f90aaa2b1642c9bf9b385010b8a4cb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2318014fa6fd4452b76b5938a7da0c6f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c1d5a98c0f324e29a3628ff49718d7b6", - "placeholder": "​", - "style": "IPY_MODEL_d0cb6b890289454981f6b9ad8cb2a0e1", - "value": "tokenizer.json: 100%" - } - }, - "231e675f282d48a39e023149d4879b8b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_16d840d19a804bec80ea85cafc850c13", - "placeholder": "​", - "style": "IPY_MODEL_8642a2df48194dc2a0314de10e0a7635", - "value": "tokenizer_config.json: 100%" - } - }, - "23790096dbc541d49e8db4c11a772a3f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a239a415866d47238ffa50a5c9c0a580", - "placeholder": "​", - "style": "IPY_MODEL_00dffcff57a14ad28d665cd2c2a11960", - "value": " 209/209 [00:00<00:00, 14.3kB/s]" - } - }, - "2846d60e43a24160b177166c25dd0122": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_231e675f282d48a39e023149d4879b8b", - "IPY_MODEL_ce1a72b3385c44a2b6c8c36acc48867f", - "IPY_MODEL_57180ced897d4007a6d836665a032802" - ], - "layout": "IPY_MODEL_8d2df8e3bb4b410f9f671d4cd2a6e80d" - } - }, - "2f803afa195c476fbfb506d53645c381": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "304108b55b1c4ae58ac271e2d8616746": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3198b48f531d4e26bff98917f9d2b592": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "34711f6447034a728316aacfc401a7e8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_fb69e7b86acd485e814ffb0f7ef142f3", - "IPY_MODEL_e92ae53e3bc14aa59b8cee25909c1d2a", - "IPY_MODEL_a6cc7eb40dbb4eff9c1e9a3f3b2aa381" - ], - "layout": "IPY_MODEL_91bf23bab4a84645b07952fc7a088c36" - } - }, - "34976cd4ca634e4cb7a5c0efffa41e81": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "34ddb97a59d940879eb53d3e4dbe177e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_be8a8c70a4c44ca4bd6fa595b29b3a35", - "IPY_MODEL_f7dba9ee7dd646f5bf4e9f8589addc83", - "IPY_MODEL_23790096dbc541d49e8db4c11a772a3f" - ], - "layout": "IPY_MODEL_19f8ecfe426246eb93849b324e986d37" - } - }, - "34ff40c5c4cf405d8ef59a12171b03a5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1c35e9b4250f4fca9e65ecfe4dcb4006", - "placeholder": "​", - "style": "IPY_MODEL_1eacc88f8b754c7e93582ce65f99b5db", - "value": "special_tokens_map.json: 100%" - } - }, - "389c5f0e14a24cf08aa175f1f21b22fc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "389fffd528eb47f4b443b5e311a43629": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "395eb951f3044c20a6416c346c3e1cdd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3d6d0fa2af094773b593a85d6c51cf48": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b17245b343ee4c2aad1afb45814ec63c", - "placeholder": "​", - "style": "IPY_MODEL_fdffb194cfad4bc2a2adb90614977445", - "value": "databricks-dolly-15k.jsonl: 100%" - } - }, - "43e58008991640f1a96e123f545ca52d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4495489fb35f495c898b334d75c8e1ed": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "44c5c62e4af7441bafbc7734982aa660": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1725a2fb58b94626a34f87c66ba0e8c2", - "placeholder": "​", - "style": "IPY_MODEL_1d6090d1b9e24e3cb550b655b8fbe318", - "value": "model.safetensors: 100%" - } - }, - "453101669bb84ec784d30fdecf9e1052": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_1acb7981a03c4d8491072db5b0f80b91", - "IPY_MODEL_f0013cd0e75942a7b6f0af20d710c9f9", - "IPY_MODEL_1caca54176f24a68841321407d5cb92c" - ], - "layout": "IPY_MODEL_4bf984821d194c64945609ccf5d08ab0" - } - }, - "47037605ebef451e91b64dd2fb040475": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "49680ea9e5ae4916b52e398e27f87ff5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4b81f2c217b24406be898b1333b56352": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2f803afa195c476fbfb506d53645c381", - "max": 5228717512, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_1734b0819fe74736a0417a9e2b977695", - "value": 5228717512 - } - }, - "4bf984821d194c64945609ccf5d08ab0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4ec2221b24b94685887b091b45f3f746": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "516627614ee0481aa5ac80cc77673a54": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "57180ced897d4007a6d836665a032802": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ba606012b7a14ad2824fe6843930ca08", - "placeholder": "​", - "style": "IPY_MODEL_9d5116fb35f44752a680fe7dc2b410b7", - "value": " 47.0k/47.0k [00:00<00:00, 2.43MB/s]" - } - }, - "578a08d4d89b496dbca00da965b745d2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_e8a1e9cc828f4a4d9c8f4e96b7fbb2fb", - "IPY_MODEL_b631f91b3a5040e0b237936b412d274b", - "IPY_MODEL_65670d440ae448c1862c9350e2784a3f" - ], - "layout": "IPY_MODEL_bb749eaf05dc4fbb9e134cc61caae11b" - } - }, - "586958735baa4f29978d399852dc2aff": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "589c07cbcc1b4d3db5bdee5a15dbd8df": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5c780ea0aeee467da497547d78453492": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5cd563e97ce742e99942f553b31e3bed": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5dd29f36fb5745618d95abda81e869bb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6311ea720e344309b1d6fa1445f347e3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "64d0bc7735bf42ce800f56ebcce3cdce": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "65670d440ae448c1862c9350e2784a3f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_395eb951f3044c20a6416c346c3e1cdd", - "placeholder": "​", - "style": "IPY_MODEL_516627614ee0481aa5ac80cc77673a54", - "value": " 15011/15011 [00:00<00:00, 62410.60 examples/s]" - } - }, - "676328ed1fb04ff4983a5b26df17d966": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "6a550e5a66704b7b819286707bd3a918": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_3d6d0fa2af094773b593a85d6c51cf48", - "IPY_MODEL_8127e4af60a149f68318c0222641718f", - "IPY_MODEL_ec45944210dc46058e722e9969a7dcdc" - ], - "layout": "IPY_MODEL_095a95bac5224763b7f512b468c7431d" - } - }, - "6e626c5ef0dd408eaf3139f6aabaf190": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "72693249b56e4995815d950d33ebbbba": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "73aa48a573e349b1a05ba0bb5526bc2a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "74dc454addc64783bbf1b3897a817147": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7899c5e27ac64478a6e6ac767da24a20", - "placeholder": "​", - "style": "IPY_MODEL_0b18c6ae2dee474aae96fdbd81637024", - "value": " 4.24M/4.24M [00:00<00:00, 31.6MB/s]" - } - }, - "7899c5e27ac64478a6e6ac767da24a20": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7bbc93e57dda4424acb428027a9f014a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f701d542971a4238aa8b76affc054743", - "placeholder": "​", - "style": "IPY_MODEL_9498c07f6ad74b248c94de3bad444f62", - "value": "tokenizer.model: 100%" - } - }, - "7d2b9dea260143eb8c2933a6d3592bb0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5cd563e97ce742e99942f553b31e3bed", - "placeholder": "​", - "style": "IPY_MODEL_e988eba4dbe546d484a6c4e88cf90b88", - "value": "config.json: 100%" - } - }, - "7d93f09ca25a498fbd4776daa0fc4c53": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8127e4af60a149f68318c0222641718f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_589c07cbcc1b4d3db5bdee5a15dbd8df", - "max": 13085339, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_ad5c35c060754bc8ae7bae0832af3921", - "value": 13085339 - } - }, - "81b9c3a820424c67a4c050545c2daa2e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_2318014fa6fd4452b76b5938a7da0c6f", - "IPY_MODEL_df141f6e170f4af98d009fd42043a359", - "IPY_MODEL_c34cba3327304cf98154ce2c73218441" - ], - "layout": "IPY_MODEL_1a949dd5e121434dbbf1b0c290d71373" - } - }, - "861dfc84b7364159a78379c91007e413": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8642a2df48194dc2a0314de10e0a7635": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "8d2df8e3bb4b410f9f671d4cd2a6e80d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "91bf23bab4a84645b07952fc7a088c36": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9498c07f6ad74b248c94de3bad444f62": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "97f1a984a0a149bc9f305f18eb109b67": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9a7787f0d75847219071be822ccd76ba": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9af5e516b8594e7da181917ff351e019": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_43e58008991640f1a96e123f545ca52d", - "placeholder": "​", - "style": "IPY_MODEL_5c780ea0aeee467da497547d78453492", - "value": "Map: 100%" - } - }, - "9bead4274c0c4fc6acf12bf6b9dec75a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_34ff40c5c4cf405d8ef59a12171b03a5", - "IPY_MODEL_9d8d908e12b846d58aea8b0e48dd6b92", - "IPY_MODEL_e9c00880fa4b47c7bf645c3f91a950a9" - ], - "layout": "IPY_MODEL_7d93f09ca25a498fbd4776daa0fc4c53" - } - }, - "9d5116fb35f44752a680fe7dc2b410b7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9d8d908e12b846d58aea8b0e48dd6b92": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6311ea720e344309b1d6fa1445f347e3", - "max": 636, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_ba866548b5544345b37e29f6d8e92652", - "value": 636 - } - }, - "a239a415866d47238ffa50a5c9c0a580": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a6cc7eb40dbb4eff9c1e9a3f3b2aa381": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_db15fee2fae44e4babb449d56aeca0f3", - "placeholder": "​", - "style": "IPY_MODEL_389c5f0e14a24cf08aa175f1f21b22fc", - "value": " 2048/2048 [00:00<00:00, 6639.89 examples/s]" - } - }, - "ad5c35c060754bc8ae7bae0832af3921": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "b12294da6032493e9ac7783b8e3ddaff": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b17245b343ee4c2aad1afb45814ec63c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b631f91b3a5040e0b237936b412d274b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_02dbaaf3131648f8a5b0eb6bf7a4d089", - "max": 15011, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_ddabe3ec75d247468550ce9b202e30ab", - "value": 15011 - } - }, - "b67122a0d1b24d168be2501782effd15": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b73ef786040243589d43806a965f0eea": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "ba606012b7a14ad2824fe6843930ca08": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ba866548b5544345b37e29f6d8e92652": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "baace428cd5545718ddc6d0749e53562": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0d4c7d8c22dc49b4be6d4948a3224852", - "placeholder": "​", - "style": "IPY_MODEL_d95ba2612d5e409da8899e679e39c4ee", - "value": " 2048/2048 [00:00<00:00, 8114.30 examples/s]" - } - }, - "bb749eaf05dc4fbb9e134cc61caae11b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "bc10c09f48534cc081dc53a4cc7bc20a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "be8a8c70a4c44ca4bd6fa595b29b3a35": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_efec2d4919314a79bd55fed697631516", - "placeholder": "​", - "style": "IPY_MODEL_389fffd528eb47f4b443b5e311a43629", - "value": "generation_config.json: 100%" - } - }, - "bf06c4115ae54e7b9da2838c9b6069a0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c1d5a98c0f324e29a3628ff49718d7b6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c34cba3327304cf98154ce2c73218441": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_64d0bc7735bf42ce800f56ebcce3cdce", - "placeholder": "​", - "style": "IPY_MODEL_01b7fbea9de54e338e3862e09d7e353d", - "value": " 17.5M/17.5M [00:00<00:00, 42.3MB/s]" - } - }, - "c4dc3a623a34415a83c2ffab0e19560b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c6487dbfe53345b9822b372069f34922": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_bf06c4115ae54e7b9da2838c9b6069a0", - "max": 2048, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_b73ef786040243589d43806a965f0eea", - "value": 2048 - } - }, - "c7e6412c823d48e9845eecb1b4e4d7f1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "cde4b31291a9493f8ef649269ca11e1c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "ce1a72b3385c44a2b6c8c36acc48867f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9a7787f0d75847219071be822ccd76ba", - "max": 47022, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_bc10c09f48534cc081dc53a4cc7bc20a", - "value": 47022 - } - }, - "d0cb6b890289454981f6b9ad8cb2a0e1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "d26f0017695b4e42b1c2736c07575775": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d5f566c5de7d4dd1808975839ab8b973": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d73b1aa5cf2e46c9ac65c617af00739f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d82e67b97ea24f80a1478783cfb0f365", - "placeholder": "​", - "style": "IPY_MODEL_586958735baa4f29978d399852dc2aff", - "value": " 5.23G/5.23G [02:03<00:00, 42.5MB/s]" - } - }, - "d82e67b97ea24f80a1478783cfb0f365": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d95ba2612d5e409da8899e679e39c4ee": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "dacf87a2148c49db9306694b5a5f33da": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "db15fee2fae44e4babb449d56aeca0f3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ddabe3ec75d247468550ce9b202e30ab": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "df141f6e170f4af98d009fd42043a359": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4495489fb35f495c898b334d75c8e1ed", - "max": 17525357, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_34976cd4ca634e4cb7a5c0efffa41e81", - "value": 17525357 - } - }, - "e715b19f10c64131ba65d96bf968d72d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_7d2b9dea260143eb8c2933a6d3592bb0", - "IPY_MODEL_087ed90b113448aa9f5079457ca4ba2b", - "IPY_MODEL_0f9fe85f7079487f837ef9a7a6d7cbc5" - ], - "layout": "IPY_MODEL_49680ea9e5ae4916b52e398e27f87ff5" - } - }, - "e8a1e9cc828f4a4d9c8f4e96b7fbb2fb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_861dfc84b7364159a78379c91007e413", - "placeholder": "​", - "style": "IPY_MODEL_b67122a0d1b24d168be2501782effd15", - "value": "Generating train split: 100%" - } - }, - "e92ae53e3bc14aa59b8cee25909c1d2a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4ec2221b24b94685887b091b45f3f746", - "max": 2048, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_ee83baaeecd944a99c11f20f9b4f03fd", - "value": 2048 - } - }, - "e988eba4dbe546d484a6c4e88cf90b88": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "e9c00880fa4b47c7bf645c3f91a950a9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d5f566c5de7d4dd1808975839ab8b973", - "placeholder": "​", - "style": "IPY_MODEL_0e17dd9f94714fb38ecbe3bd68873c1c", - "value": " 636/636 [00:00<00:00, 33.0kB/s]" - } - }, - "ec45944210dc46058e722e9969a7dcdc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_22f90aaa2b1642c9bf9b385010b8a4cb", - "placeholder": "​", - "style": "IPY_MODEL_c7e6412c823d48e9845eecb1b4e4d7f1", - "value": " 13.1M/13.1M [00:00<00:00, 49.8MB/s]" - } - }, - "ee83baaeecd944a99c11f20f9b4f03fd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "efec2d4919314a79bd55fed697631516": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f0013cd0e75942a7b6f0af20d710c9f9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5dd29f36fb5745618d95abda81e869bb", - "max": 8199, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_f433b043c2ad41d7ba01a9ee1187fffe", - "value": 8199 - } - }, - "f2418db0b0ee4d3ca801f11c75ac1aca": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f433b043c2ad41d7ba01a9ee1187fffe": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "f701d542971a4238aa8b76affc054743": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f7dba9ee7dd646f5bf4e9f8589addc83": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d26f0017695b4e42b1c2736c07575775", - "max": 209, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_73aa48a573e349b1a05ba0bb5526bc2a", - "value": 209 - } - }, - "fb69e7b86acd485e814ffb0f7ef142f3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_151b7ed8c9ca4a3192e2a28ff99c3dc6", - "placeholder": "​", - "style": "IPY_MODEL_97f1a984a0a149bc9f305f18eb109b67", - "value": "Map: 100%" - } - }, - "fdffb194cfad4bc2a2adb90614977445": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "fe486852cda849d5b2cf2dda69c46feb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9af5e516b8594e7da181917ff351e019", - "IPY_MODEL_c6487dbfe53345b9822b372069f34922", - "IPY_MODEL_baace428cd5545718ddc6d0749e53562" - ], - "layout": "IPY_MODEL_b12294da6032493e9ac7783b8e3ddaff" - } - } - } } }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file