ML Models: Time Traveling

tutorials

ai/ml

machine learning (ml)

time traveling

Learn how to time-travel between versions of ML models, similar to time traveling through array data.

In this tutorial, you will learn how to time travel between different version of ML models. Time traveling and versioning allow users to manage the state and history of your models and navigate through different states and versions. This is particularly useful for debugging, understanding the history of state changes, and ensuring the correctness of your models. It is essential for tracking changes, collaborating among multiple developers, maintaining a history of modifications, and enhancing the development workflow.

Import libraries

Start by importing the libraries used in this tutorial.

import tempfile

import tiledb
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tiledb.ml.models.pytorch import PyTorchTileDBModel

Configure store paths

uri = tempfile.mkdtemp("ml_model")

Configure the model

epochs = 1
batch_size_train = 128
learning_rate = 0.01
momentum = 0.5
log_interval = 10

Define the model

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

Save the model

network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
tiledb_model = PyTorchTileDBModel(uri=uri, model=network, optimizer=optimizer)
tiledb_model.save(meta={"epochs": epochs})

Change the model

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
tiledb_model = PyTorchTileDBModel(uri=uri, model=network, optimizer=optimizer)
tiledb_model.save(meta={"epochs": epochs})

Check model fragments

fragments_info = tiledb.array_fragments(uri)

print()
print("====== FRAGMENTS  INFO ======")
print("array uri: {}".format(fragments_info.array_uri))
print("number of fragments: {}".format(len(fragments_info)))
for fragment_num, fragment in enumerate(fragments_info, start=1):
    print()
    print("===== FRAGMENT NUMBER {} =====".format(fragment.num))
    print("fragment uri: {}".format(fragment.uri))
    print("timestamp range: {}".format(fragment.timestamp_range))
    print(
        "number of unconsolidated metadata: {}".format(
            fragment.unconsolidated_metadata_num
        )
    )
    print("version: {}".format(fragment.version))

Import libraries

Start by importing the libraries used in this tutorial.

import tempfile

import tensorflow as tf
import tiledb
from tiledb.ml.models.tensorflow_keras import TensorflowKerasTileDBModel

Configure store paths

uri = tempfile.mkdtemp("ml_model")

Define the model

model = tf.keras.models.Sequential(
    [
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10),
    ]
)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer="adam", loss=loss_fn, metrics=["accuracy"])

Save the model

tiledb_model = TensorflowKerasTileDBModel(uri=uri, model=model)
tiledb_model.save(include_optimizer=True)

Change the model

# Remove Dropout layer
model_2 = tf.keras.models.Sequential(
    [
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dense(5),
    ]
)
tiledb_model = TensorflowKerasTileDBModel(uri=uri, model=model_2)
tiledb_model.save(include_optimizer=True)

Check model fragments

fragments_info = tiledb.array_fragments(uri)
print("array uri: {}".format(fragments_info.array_uri))
print("number of fragments: {}".format(len(fragments_info)))

for fragment_num, fragment in enumerate(fragments_info, start=1):
    print()
    print("===== FRAGMENT NUMBER {} =====".format(fragment.num))
    print("fragment uri: {}".format(fragment.uri))
    print("timestamp range: {}".format(fragment.timestamp_range))
    print(
        "number of unconsolidated metadata: {}".format(
            fragment.unconsolidated_metadata_num
        )
    )
    print("version: {}".format(fragment.version))

Import libraries

Start by importing the libraries used in this tutorial.

import tempfile

import tiledb
from sklearn.linear_model import LinearRegression, LogisticRegression
from tiledb.ml.models.sklearn import SklearnTileDBModel

Configure store paths

uri = tempfile.mkdtemp("ml_model")

Define the model

model_1 = LogisticRegression(random_state=0)

Save the model

tiledb_model = SklearnTileDBModel(uri=uri, model=model_1)
tiledb_model.save(meta={"Sparsity_with_L1_penalty": "dummy_meta"})

Change the model

model_2 = LinearRegression()
tiledb_model = SklearnTileDBModel(uri=uri, model=model_2)
tiledb_model.save(meta={"Sparsity_with_L1_penalty": "dummy_meta"})

Check model fragments

fragments_info = tiledb.array_fragments(uri)
print("array uri: {}".format(fragments_info.array_uri))
print("number of fragments: {}".format(len(fragments_info)))

for fragment_num, fragment in enumerate(fragments_info, start=1):
    print()
    print("===== FRAGMENT NUMBER {} =====".format(fragment.num))
    print("fragment uri: {}".format(fragment.uri))
    print("timestamp range: {}".format(fragment.timestamp_range))
    print(
        "number of unconsolidated metadata: {}".format(
            fragment.unconsolidated_metadata_num
        )
    )
    print("version: {}".format(fragment.version))