train fix

2025-09-30 17:04:43 +02:00 · 2025-09-30 17:04:43 +02:00 · 4df499be5c
commit 4df499be5c
parent abad691246
6 changed files with 24 additions and 12 deletions
--- a/.github/workflows/mlops-pipeline.yml
+++ b/.github/workflows/mlops-pipeline.yml
@ -46,6 +46,8 @@ jobs:
        run: poetry run python src/models/train.py
        env:
          MLFLOW_TRACKING_URI: ${{ secrets.MLFLOW_URI }}
+          MLFLOW_TRACKING_USERNAME: ${{ secrets.CSGO }}
+          MLFLOW_TRACKING_PASSWORD: ${{ secrets.MLOPSEPITALYON }}

  deploy:
    needs: train
--- a/mlruns/0/meta.yaml
+++ b/mlruns/0/meta.yaml
@ -1,6 +0,0 @@
-artifact_location: file:///home/paul/ING3/MLOps/mlruns/0
-creation_time: 1759242937587
-experiment_id: '0'
-last_update_time: 1759242937587
-lifecycle_stage: active
-name: Default
--- a/poetry.lock
+++ b/poetry.lock
@ -8051,4 +8051,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.12,<3.14"
-content-hash = "8e91d18cc21544a0a2fa02777272f825c4f6f4eec436b8df78ea4b3ec210451a"
+content-hash = "be258547b329a3efc055ed4ce77a1a1e89ead1c69957522d731c6438a6a80ab7"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -20,7 +20,8 @@ dependencies = [
    "pydantic (>=2.5.0,<3.0.0)",
    "pytest (>=7.0.0,<8.0.0)",
    "pytest-cov (>=4.0.0,<5.0.0)",
-    "uvicorn (>=0.24.0,<1.0.0)"
+    "uvicorn (>=0.24.0,<1.0.0)",
+    "pandas (>=2.0.0,<3.0.0)"
 ]


--- a/src/models/train.py
+++ b/src/models/train.py
@ -1,11 +1,13 @@
 import mlflow
 import mlflow.sklearn
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+import pandas as pd

-mlflow.set_tracking_uri("http://localhost:5000")
+mlflow.set_tracking_uri("https://mlflow.sortifal.dev")
 mlflow.set_experiment("csgo-match-prediction")

-def train_model(X_train, y_train, params):
+def train_model(X_train, y_train, X_test, y_test, params):
    with mlflow.start_run(run_name="rf-v1"):
        # Log params
        mlflow.log_params(params)
@ -20,6 +22,19 @@ def train_model(X_train, y_train, params):
        mlflow.log_metric("accuracy", accuracy)
        
        # Log model
-        mlflow.sklearn.log_model(model, "model")
+        # mlflow.sklearn.log_model(model, "model")  # Commented out due to server permission issue
        
-        return model
+        return model
+
+if __name__ == "__main__":
+    # Load data (example with results.csv)
+    df = pd.read_csv("/home/paul/ING3/MLOps/data/raw/results.csv")
+    # Select numeric columns for features
+    numeric_cols = ['result_1', 'result_2', 'starting_ct', 'ct_1', 't_2', 't_1', 'ct_2', 'rank_1', 'rank_2', 'map_wins_1', 'map_wins_2']
+    X = df[numeric_cols]
+    y = df['match_winner'] - 1  # 0 or 1
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    
+    params = {"n_estimators": 100, "max_depth": 10}
+    model = train_model(X_train, y_train, X_test, y_test, params)
+    print("Training completed and logged to MLflow.")
--- a/tests/test_api.py
+++ b/tests/test_api.py