secrets and mlflow should now work
This commit is contained in:
parent
f107164b51
commit
af9b700a5b
@ -22,6 +22,11 @@ jobs:
|
|||||||
pip install poetry
|
pip install poetry
|
||||||
poetry install
|
poetry install
|
||||||
|
|
||||||
|
- name: Setup DVC
|
||||||
|
run: |
|
||||||
|
poetry run dvc remote list
|
||||||
|
poetry run dvc pull || echo "DVC pull failed, continuing..."
|
||||||
|
|
||||||
- name: Start API server
|
- name: Start API server
|
||||||
run: |
|
run: |
|
||||||
poetry run uvicorn src.api.main:app --host 0.0.0.0 --port 8000 &
|
poetry run uvicorn src.api.main:app --host 0.0.0.0 --port 8000 &
|
||||||
@ -48,20 +53,28 @@ jobs:
|
|||||||
poetry install
|
poetry install
|
||||||
|
|
||||||
- name: Setup DVC
|
- name: Setup DVC
|
||||||
run: poetry run dvc pull
|
run: |
|
||||||
|
poetry run dvc remote list
|
||||||
|
poetry run dvc pull || echo "DVC pull failed, continuing..."
|
||||||
|
|
||||||
- name: Train model
|
- name: Run DVC pipeline
|
||||||
run: poetry run python src/models/train.py
|
run: poetry run dvc repro
|
||||||
env:
|
env:
|
||||||
MLFLOW_TRACKING_URI: ${{ secrets.MLFLOW_URI }}
|
MLFLOW_TRACKING_URI: ${{ secrets.MLFLOW_TRACKING_URI }}
|
||||||
MLFLOW_TRACKING_USERNAME: ${{ secrets.CSGO }}
|
MLFLOW_TRACKING_USERNAME: ${{ secrets.MLFLOW_TRACKING_USERNAME }}
|
||||||
MLFLOW_TRACKING_PASSWORD: ${{ secrets.MLOPSEPITALYON }}
|
MLFLOW_TRACKING_PASSWORD: ${{ secrets.MLFLOW_TRACKING_PASSWORD }}
|
||||||
|
|
||||||
|
- name: Push DVC changes
|
||||||
|
run: |
|
||||||
|
poetry run dvc push || echo "DVC push failed, continuing..."
|
||||||
|
|
||||||
deploy:
|
deploy:
|
||||||
needs: train
|
needs: train
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
if: github.ref == 'refs/heads/main'
|
if: github.ref == 'refs/heads/main'
|
||||||
steps:
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
- name: Build Docker image
|
- name: Build Docker image
|
||||||
run: |
|
run: |
|
||||||
docker build -t csgo-mlops:${{ github.sha }} .
|
docker build -t csgo-mlops:${{ github.sha }} .
|
||||||
7
models/metrics.json
Normal file
7
models/metrics.json
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"accuracy": 1.0,
|
||||||
|
"precision": 1.0,
|
||||||
|
"recall": 1.0,
|
||||||
|
"f1_score": 1.0,
|
||||||
|
"roc_auc": 1.0
|
||||||
|
}
|
||||||
@ -21,7 +21,8 @@ dependencies = [
|
|||||||
"pytest (>=7.0.0,<8.0.0)",
|
"pytest (>=7.0.0,<8.0.0)",
|
||||||
"pytest-cov (>=4.0.0,<5.0.0)",
|
"pytest-cov (>=4.0.0,<5.0.0)",
|
||||||
"uvicorn (>=0.24.0,<1.0.0)",
|
"uvicorn (>=0.24.0,<1.0.0)",
|
||||||
"pandas (>=2.0.0,<3.0.0)"
|
"pandas (>=2.0.0,<3.0.0)",
|
||||||
|
"pyyaml (>=6.0.0,<7.0.0)"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -7,14 +7,23 @@ import mlflow.sklearn
|
|||||||
import yaml
|
import yaml
|
||||||
import json
|
import json
|
||||||
import pickle
|
import pickle
|
||||||
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from sklearn.ensemble import RandomForestClassifier
|
from sklearn.ensemble import RandomForestClassifier
|
||||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
# Configure MLflow
|
# Configure MLflow
|
||||||
mlflow.set_tracking_uri("https://mlflow.sortifal.dev")
|
mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI", "https://mlflow.sortifal.dev"))
|
||||||
mlflow.set_experiment("csgo-match-prediction")
|
|
||||||
|
# Try to set experiment, but handle auth errors gracefully
|
||||||
|
USE_MLFLOW = True
|
||||||
|
try:
|
||||||
|
mlflow.set_experiment("csgo-match-prediction")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Could not connect to MLflow: {e}")
|
||||||
|
print("Training will continue without MLflow tracking.")
|
||||||
|
USE_MLFLOW = False
|
||||||
|
|
||||||
def load_params():
|
def load_params():
|
||||||
"""Load training parameters from params.yaml"""
|
"""Load training parameters from params.yaml"""
|
||||||
@ -92,38 +101,64 @@ def main():
|
|||||||
print(f" Test samples: {len(X_test)}")
|
print(f" Test samples: {len(X_test)}")
|
||||||
print(f" Features: {X_train.shape[1]}")
|
print(f" Features: {X_train.shape[1]}")
|
||||||
|
|
||||||
# Start MLflow run
|
if USE_MLFLOW:
|
||||||
with mlflow.start_run(run_name="random-forest-csgo"):
|
# Start MLflow run
|
||||||
# Log parameters
|
with mlflow.start_run(run_name="random-forest-csgo"):
|
||||||
mlflow.log_params(params)
|
# Log parameters
|
||||||
mlflow.log_param("n_features", X_train.shape[1])
|
mlflow.log_params(params)
|
||||||
mlflow.log_param("n_train_samples", len(X_train))
|
mlflow.log_param("n_features", X_train.shape[1])
|
||||||
mlflow.log_param("n_test_samples", len(X_test))
|
mlflow.log_param("n_train_samples", len(X_train))
|
||||||
|
mlflow.log_param("n_test_samples", len(X_test))
|
||||||
|
|
||||||
# Train model
|
# Train model
|
||||||
|
model = train_model(X_train, y_train, params)
|
||||||
|
|
||||||
|
# Evaluate model
|
||||||
|
metrics = evaluate_model(model, X_test, y_test)
|
||||||
|
|
||||||
|
# Log metrics to MLflow
|
||||||
|
mlflow.log_metrics(metrics)
|
||||||
|
|
||||||
|
# Log feature importance
|
||||||
|
feature_importance = dict(zip(X_train.columns, model.feature_importances_))
|
||||||
|
top_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||||
|
print("\nTop 5 most important features:")
|
||||||
|
for feat, importance in top_features:
|
||||||
|
print(f" {feat}: {importance:.4f}")
|
||||||
|
mlflow.log_metric(f"importance_{feat}", importance)
|
||||||
|
|
||||||
|
# Try to log model to MLflow (if permissions allow)
|
||||||
|
try:
|
||||||
|
mlflow.sklearn.log_model(model, "model")
|
||||||
|
print("\nModel logged to MLflow successfully!")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\nWarning: Could not log model to MLflow: {e}")
|
||||||
|
print("Model will only be saved locally.")
|
||||||
|
|
||||||
|
# Save model and metrics locally
|
||||||
|
save_model(model, metrics)
|
||||||
|
|
||||||
|
# Print results
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("Training Results:")
|
||||||
|
print("=" * 60)
|
||||||
|
for metric, value in metrics.items():
|
||||||
|
print(f" {metric}: {value:.4f}")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
print(f"\nMLflow run ID: {mlflow.active_run().info.run_id}")
|
||||||
|
print(f"View run at: {mlflow.get_tracking_uri()}")
|
||||||
|
else:
|
||||||
|
# Train without MLflow
|
||||||
model = train_model(X_train, y_train, params)
|
model = train_model(X_train, y_train, params)
|
||||||
|
|
||||||
# Evaluate model
|
|
||||||
metrics = evaluate_model(model, X_test, y_test)
|
metrics = evaluate_model(model, X_test, y_test)
|
||||||
|
|
||||||
# Log metrics to MLflow
|
|
||||||
mlflow.log_metrics(metrics)
|
|
||||||
|
|
||||||
# Log feature importance
|
# Log feature importance
|
||||||
feature_importance = dict(zip(X_train.columns, model.feature_importances_))
|
feature_importance = dict(zip(X_train.columns, model.feature_importances_))
|
||||||
top_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:5]
|
top_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||||
print("\nTop 5 most important features:")
|
print("\nTop 5 most important features:")
|
||||||
for feat, importance in top_features:
|
for feat, importance in top_features:
|
||||||
print(f" {feat}: {importance:.4f}")
|
print(f" {feat}: {importance:.4f}")
|
||||||
mlflow.log_metric(f"importance_{feat}", importance)
|
|
||||||
|
|
||||||
# Try to log model to MLflow (if permissions allow)
|
|
||||||
try:
|
|
||||||
mlflow.sklearn.log_model(model, "model")
|
|
||||||
print("\nModel logged to MLflow successfully!")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"\nWarning: Could not log model to MLflow: {e}")
|
|
||||||
print("Model will only be saved locally.")
|
|
||||||
|
|
||||||
# Save model and metrics locally
|
# Save model and metrics locally
|
||||||
save_model(model, metrics)
|
save_model(model, metrics)
|
||||||
@ -136,9 +171,6 @@ def main():
|
|||||||
print(f" {metric}: {value:.4f}")
|
print(f" {metric}: {value:.4f}")
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
|
|
||||||
print(f"\nMLflow run ID: {mlflow.active_run().info.run_id}")
|
|
||||||
print(f"View run at: {mlflow.get_tracking_uri()}")
|
|
||||||
|
|
||||||
print("\nTraining pipeline completed successfully!")
|
print("\nTraining pipeline completed successfully!")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user