Fix DVC pull to only fetch raw data

Changed dvc pull to specifically pull data/raw.dvc instead of all
outputs. The processed data and model files are generated by the
DVC pipeline (dvc repro), not pulled from remote storage.

This prevents errors about missing processed files that haven't
been generated yet.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Alexis Bruneteau 2025-10-01 18:52:16 +02:00
parent 662d1a3b8f
commit c9dbe70bdb

View File

@ -38,7 +38,7 @@ jobs:
poetry run dvc remote modify minio access_key_id $AWS_ACCESS_KEY_ID
poetry run dvc remote modify minio secret_access_key $AWS_SECRET_ACCESS_KEY
poetry run dvc remote list
poetry run dvc pull || echo "DVC pull failed, continuing..."
poetry run dvc pull data/raw.dvc || echo "DVC pull failed, continuing..."
env:
AWS_ACCESS_KEY_ID: ${{ secrets.DVC_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DVC_PASSWORD }}
@ -84,7 +84,7 @@ jobs:
poetry run dvc remote modify minio access_key_id $AWS_ACCESS_KEY_ID
poetry run dvc remote modify minio secret_access_key $AWS_SECRET_ACCESS_KEY
poetry run dvc remote list
poetry run dvc pull || echo "DVC pull failed, continuing..."
poetry run dvc pull data/raw.dvc || echo "DVC pull failed, continuing..."
env:
AWS_ACCESS_KEY_ID: ${{ secrets.DVC_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DVC_PASSWORD }}