This guide explains how to save, load, and manage machine learning models using ObjML's persistence features.
ObjML provides database-backed model persistence with:
First, create the def_ml_models table:
python factory.core/ObjML.py create-table
from ObjML import ObjML
from sklearn.linear_model import LogisticRegression
import numpy as np
# Initialize ObjML
obj_ml = ObjML(0)
# Train a model
X_train = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
y_train = np.array([0, 1, 0, 1])
model = LogisticRegression()
model.fit(X_train, y_train)
# Save to database
model_id = obj_ml.save_model_to_db(
model=model,
model_name="credit_risk_model",
version="v1.0",
feature_names=["age", "income"],
training_metrics={
"accuracy": 0.95,
"precision": 0.93,
"recall": 0.92
}
)
print(f"Model saved: {model_id}")
from ObjML import ObjML
obj_ml = ObjML(0)
# Load the latest version
model, metadata = obj_ml.load_model_from_db(
model_name="credit_risk_model",
version="latest"
)
# Make predictions
X_test = np.array([[2, 3], [4, 5]])
predictions = model.predict(X_test)
print(f"Predictions: {predictions}")
print(f"Model type: {metadata['model_type']}")
print(f"Features: {metadata['feature_names']}")
When using preprocessors like OneHotEncoder:
from sklearn.preprocessing import OneHotEncoder
# Create and fit encoder
encoder = OneHotEncoder()
encoder.fit([['A'], ['B'], ['C']])
# Save model with encoder
model_id = obj_ml.save_model_to_db(
model=model,
model_name="model_with_preprocessing",
version="v1.0",
feature_names=["category", "amount"],
training_metrics={"accuracy": 0.90},
encoder=encoder
)
Loading and using the encoder:
model, metadata = obj_ml.load_model_from_db(
model_name="model_with_preprocessing",
version="latest"
)
# Use the saved encoder
if metadata['encoder']:
encoded_data = metadata['encoder'].transform([['A']])
predictions = model.predict(encoded_data)
# Save version 1.0
obj_ml.save_model_to_db(
model=model_v1,
model_name="fraud_detector",
version="v1.0",
feature_names=features,
training_metrics={"auc": 0.85}
)
# Later, save improved version 2.0
obj_ml.save_model_to_db(
model=model_v2,
model_name="fraud_detector",
version="v2.0",
feature_names=features,
training_metrics={"auc": 0.92},
is_active=True # Mark as production version
)
# Load specific version
model_v1, _ = obj_ml.load_model_from_db(
model_name="fraud_detector",
version="v1.0"
)
# Load latest version
model_latest, _ = obj_ml.load_model_from_db(
model_name="fraud_detector",
version="latest"
)
def predict_credit_risk(customer_data):
"""Production prediction function."""
obj_ml = ObjML(0)
# Load active model
model, metadata = obj_ml.load_model_from_db(
model_name="credit_risk_model",
version="latest"
)
# Prepare features in correct order
features = customer_data[metadata['feature_names']]
# Apply encoder if present
if metadata['encoder']:
features = metadata['encoder'].transform(features)
# Make prediction
prediction = model.predict(features)
probability = model.predict_proba(features)
return {
"prediction": prediction[0],
"probability": probability[0][1],
"model_id": metadata['model_id'],
"model_type": metadata['model_type']
}
python factory.core/ObjML.py list-saved-models
python factory.core/ObjML.py list-saved-models --model credit_risk_model
python factory.core/ObjML.py list-saved-models --limit 10
# Show latest version
python factory.core/ObjML.py show-model credit_risk_model
# Show specific version
python factory.core/ObjML.py show-model credit_risk_model --version v1.0
from ObjML import ObjML
from ObjMLDatasets import ObjMLDatasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score
# 1. Load dataset
datasets = ObjMLDatasets()
df = datasets.load_dataset("german_credit")
# 2. Prepare data
X = df.drop("class", axis=1)
y = df["class"]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42
)
# 3. Train model
obj_ml = ObjML(0)
model, metrics, y_pred, X_test, y_test = obj_ml.train_cost_sensitive_classifier(
X=X_train,
y=y_train,
class_weights={0: 1, 1: 5},
model_type="LogisticRegression"
)
# 4. Calculate metrics
y_pred_test = model.predict(X_test)
training_metrics = {
"accuracy": accuracy_score(y_test, y_pred_test),
"precision": precision_score(y_test, y_pred_test),
"recall": recall_score(y_test, y_pred_test),
"confusion_matrix": metrics["confusion_matrix"]
}
# 5. Save model
model_id = obj_ml.save_model_to_db(
model=model,
model_name="german_credit_model",
version="v1.0",
feature_names=list(X_train.columns),
training_metrics=training_metrics,
created_by="ml_engineer",
is_active=True
)
print(f"✓ Model saved: {model_id}")
# 6. Later: Load and use
loaded_model, metadata = obj_ml.load_model_from_db(
model_name="german_credit_model",
version="latest"
)
# Make new predictions
new_predictions = loaded_model.predict(X_test[:5])
print(f"✓ Predictions: {new_predictions}")
print(f"✓ Using model: {metadata['model_id']}")
Use semantic versioning or date-based versioning:
# Semantic versioning
version="v1.0"
version="v1.1"
version="v2.0"
# Date-based versioning
version="v2025-12-26"
version="v2025-12-27-hotfix"
training_metrics = {
"accuracy": 0.95,
"precision": 0.93,
"recall": 0.92,
"f1_score": 0.93,
"auc": 0.96,
"confusion_matrix": [[50, 5], [3, 42]],
"training_samples": 1000,
"test_samples": 300,
"class_distribution": {0: 550, 1: 450}
}
Always store feature names to ensure correct prediction order:
# Store feature names when saving
feature_names = list(X_train.columns)
model_id = obj_ml.save_model_to_db(
model=model,
model_name="my_model",
version="v1.0",
feature_names=feature_names, # Critical!
training_metrics=metrics
)
# Use feature names when loading
model, metadata = obj_ml.load_model_from_db(
model_name="my_model",
version="latest"
)
# Ensure features are in correct order
X_pred = customer_data[metadata['feature_names']]
predictions = model.predict(X_pred)
# Mark as active/production model
obj_ml.save_model_to_db(
model=model,
model_name="production_model",
version="v2.0",
feature_names=features,
training_metrics=metrics,
is_active=True # Flag for production
)
Always validate loaded models before production use:
# Load model
model, metadata = obj_ml.load_model_from_db(
model_name="my_model",
version="latest"
)
# Validate on test set
test_predictions = model.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
if test_accuracy < 0.80:
print("⚠ Warning: Model accuracy below threshold")
else:
print(f"✓ Model validated: {test_accuracy:.2f}")
try:
model, metadata = obj_ml.load_model_from_db(
model_name="my_model",
version="v1.0"
)
except ValueError as e:
print(f"Error: {e}")
# List available models
models = obj_ml.list_models(model_name="my_model")
print(f"Available versions: {[m['version'] for m in models]}")
If sklearn versions differ between training and loading:
model, metadata = obj_ml.load_model_from_db(model_name="my_model")
print(f"Trained with: {metadata['metadata']['sklearn_version']}")
print(f"Current version: {sklearn.__version__}")
# Models usually work across minor versions
# For major version differences, consider retraining
For models >100MB:
max_allowed_packet settingmodel_id = obj_ml.save_model_to_db(
model, # Trained scikit-learn model
model_name, # Name for the model
version, # Version string (e.g., "v1.0")
feature_names, # List of feature names
training_metrics, # Dict of training metrics
encoder=None, # Optional encoder/preprocessor
model_type=None, # Auto-detected if not provided
created_by=None, # Username/identifier
is_active=False # Production flag
)
model, metadata = obj_ml.load_model_from_db(
model_name, # Name of model to load
version="latest" # "latest" or specific version
)
# metadata contains:
# - encoder: Preprocessor object (if any)
# - metadata: Training metadata (date, versions, etc.)
# - feature_names: List of feature names
# - model_id: UUID of the model
# - model_type: Type of model
# - created_at: Creation timestamp
# - training_metrics: Training metrics dict
models = obj_ml.list_models(
model_name=None, # Filter by name (optional)
limit=None # Max results (optional)
)
# Returns list of dicts with:
# - model_id, model_name, version, model_type
# - created_at, created_by, is_active
# - feature_count