MLOps/.gitea/workflows/mlops-pipeline.yml
Alexis Bruneteau c9dbe70bdb Fix DVC pull to only fetch raw data
Changed dvc pull to specifically pull data/raw.dvc instead of all
outputs. The processed data and model files are generated by the
DVC pipeline (dvc repro), not pulled from remote storage.

This prevents errors about missing processed files that haven't
been generated yet.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-01 18:52:16 +02:00

127 lines
3.7 KiB
YAML

name: MLOps CI/CD Pipeline
on:
push:
branches: [main, dev]
pull_request:
branches: [main]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Cache Poetry dependencies
uses: actions/cache@v3
with:
path: |
~/.cache/pypoetry
~/.cache/pip
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Install Poetry
run: pip install poetry
- name: Install dependencies
run: poetry install --no-interaction --no-root
- name: Setup DVC
run: |
poetry run dvc remote modify minio access_key_id $AWS_ACCESS_KEY_ID
poetry run dvc remote modify minio secret_access_key $AWS_SECRET_ACCESS_KEY
poetry run dvc remote list
poetry run dvc pull data/raw.dvc || echo "DVC pull failed, continuing..."
env:
AWS_ACCESS_KEY_ID: ${{ secrets.DVC_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DVC_PASSWORD }}
- name: Start API server
run: |
poetry run uvicorn src.api.main:app --host 0.0.0.0 --port 8000 &
sleep 10
- name: Run unit tests
run: poetry run pytest tests/ --cov=src --cov-report=xml
train:
needs: test
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Cache Poetry dependencies
uses: actions/cache@v3
with:
path: |
~/.cache/pypoetry
~/.cache/pip
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Install Poetry
run: pip install poetry
- name: Install dependencies
run: poetry install --no-interaction --no-root
- name: Setup DVC
run: |
poetry run dvc remote modify minio access_key_id $AWS_ACCESS_KEY_ID
poetry run dvc remote modify minio secret_access_key $AWS_SECRET_ACCESS_KEY
poetry run dvc remote list
poetry run dvc pull data/raw.dvc || echo "DVC pull failed, continuing..."
env:
AWS_ACCESS_KEY_ID: ${{ secrets.DVC_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DVC_PASSWORD }}
- name: Run DVC pipeline
run: poetry run dvc repro
env:
MLFLOW_TRACKING_URI: ${{ secrets.MLFLOW_TRACKING_URI }}
MLFLOW_TRACKING_USERNAME: ${{ secrets.MLFLOW_TRACKING_USERNAME }}
MLFLOW_TRACKING_PASSWORD: ${{ secrets.MLFLOW_TRACKING_PASSWORD }}
- name: Push DVC changes
run: |
poetry run dvc remote modify minio access_key_id $AWS_ACCESS_KEY_ID
poetry run dvc remote modify minio secret_access_key $AWS_SECRET_ACCESS_KEY
poetry run dvc push || echo "DVC push failed, continuing..."
env:
AWS_ACCESS_KEY_ID: ${{ secrets.DVC_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DVC_PASSWORD }}
deploy:
needs: train
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v3
- name: Build Docker image
run: |
docker build -t csgo-mlops:${{ github.sha }} .
docker tag csgo-mlops:${{ github.sha }} csgo-mlops:latest
- name: Push to registry
run: |
docker push csgo-mlops:${{ github.sha }}
docker push csgo-mlops:latest
- name: Deploy to Kubernetes
run: kubectl apply -f kubernetes/deployment.yml