40 lines
1.5 KiB
Python
40 lines
1.5 KiB
Python
import mlflow
|
|
import mlflow.sklearn
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
from sklearn.model_selection import train_test_split
|
|
import pandas as pd
|
|
|
|
mlflow.set_tracking_uri("https://mlflow.sortifal.dev")
|
|
mlflow.set_experiment("csgo-match-prediction")
|
|
|
|
def train_model(X_train, y_train, X_test, y_test, params):
|
|
with mlflow.start_run(run_name="rf-v1"):
|
|
# Log params
|
|
mlflow.log_params(params)
|
|
mlflow.log_param("data_version", "v1.0.0")
|
|
|
|
# Train
|
|
model = RandomForestClassifier(**params)
|
|
model.fit(X_train, y_train)
|
|
|
|
# Log metrics
|
|
accuracy = model.score(X_test, y_test)
|
|
mlflow.log_metric("accuracy", accuracy)
|
|
|
|
# Log model
|
|
# mlflow.sklearn.log_model(model, "model") # Commented out due to server permission issue
|
|
|
|
return model
|
|
|
|
if __name__ == "__main__":
|
|
# Load data (example with results.csv)
|
|
df = pd.read_csv("/home/paul/ING3/MLOps/data/raw/results.csv")
|
|
# Select numeric columns for features
|
|
numeric_cols = ['result_1', 'result_2', 'starting_ct', 'ct_1', 't_2', 't_1', 'ct_2', 'rank_1', 'rank_2', 'map_wins_1', 'map_wins_2']
|
|
X = df[numeric_cols]
|
|
y = df['match_winner'] - 1 # 0 or 1
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
|
|
params = {"n_estimators": 100, "max_depth": 10}
|
|
model = train_model(X_train, y_train, X_test, y_test, params)
|
|
print("Training completed and logged to MLflow.") |