diff --git a/.github/workflows/mlops-pipeline.yml b/.github/workflows/mlops-pipeline.yml index 3243232..14e2122 100644 --- a/.github/workflows/mlops-pipeline.yml +++ b/.github/workflows/mlops-pipeline.yml @@ -46,6 +46,8 @@ jobs: run: poetry run python src/models/train.py env: MLFLOW_TRACKING_URI: ${{ secrets.MLFLOW_URI }} + MLFLOW_TRACKING_USERNAME: ${{ secrets.CSGO }} + MLFLOW_TRACKING_PASSWORD: ${{ secrets.MLOPSEPITALYON }} deploy: needs: train diff --git a/mlruns/0/meta.yaml b/mlruns/0/meta.yaml deleted file mode 100644 index e9c01f3..0000000 --- a/mlruns/0/meta.yaml +++ /dev/null @@ -1,6 +0,0 @@ -artifact_location: file:///home/paul/ING3/MLOps/mlruns/0 -creation_time: 1759242937587 -experiment_id: '0' -last_update_time: 1759242937587 -lifecycle_stage: active -name: Default diff --git a/poetry.lock b/poetry.lock index b6726bf..fcdb80b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -8051,4 +8051,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.12,<3.14" -content-hash = "8e91d18cc21544a0a2fa02777272f825c4f6f4eec436b8df78ea4b3ec210451a" +content-hash = "be258547b329a3efc055ed4ce77a1a1e89ead1c69957522d731c6438a6a80ab7" diff --git a/pyproject.toml b/pyproject.toml index 213293e..577a2f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,8 @@ dependencies = [ "pydantic (>=2.5.0,<3.0.0)", "pytest (>=7.0.0,<8.0.0)", "pytest-cov (>=4.0.0,<5.0.0)", - "uvicorn (>=0.24.0,<1.0.0)" + "uvicorn (>=0.24.0,<1.0.0)", + "pandas (>=2.0.0,<3.0.0)" ] diff --git a/src/models/train.py b/src/models/train.py index 2937ed2..55ee145 100644 --- a/src/models/train.py +++ b/src/models/train.py @@ -1,11 +1,13 @@ import mlflow import mlflow.sklearn from sklearn.ensemble import RandomForestClassifier +from sklearn.model_selection import train_test_split +import pandas as pd -mlflow.set_tracking_uri("http://localhost:5000") +mlflow.set_tracking_uri("https://mlflow.sortifal.dev") mlflow.set_experiment("csgo-match-prediction") -def train_model(X_train, y_train, params): +def train_model(X_train, y_train, X_test, y_test, params): with mlflow.start_run(run_name="rf-v1"): # Log params mlflow.log_params(params) @@ -20,6 +22,19 @@ def train_model(X_train, y_train, params): mlflow.log_metric("accuracy", accuracy) # Log model - mlflow.sklearn.log_model(model, "model") + # mlflow.sklearn.log_model(model, "model") # Commented out due to server permission issue - return model \ No newline at end of file + return model + +if __name__ == "__main__": + # Load data (example with results.csv) + df = pd.read_csv("/home/paul/ING3/MLOps/data/raw/results.csv") + # Select numeric columns for features + numeric_cols = ['result_1', 'result_2', 'starting_ct', 'ct_1', 't_2', 't_1', 'ct_2', 'rank_1', 'rank_2', 'map_wins_1', 'map_wins_2'] + X = df[numeric_cols] + y = df['match_winner'] - 1 # 0 or 1 + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + params = {"n_estimators": 100, "max_depth": 10} + model = train_model(X_train, y_train, X_test, y_test, params) + print("Training completed and logged to MLflow.") \ No newline at end of file diff --git a/tests/test_api.py b/tests/test_api.py deleted file mode 100644 index e69de29..0000000