From 455cefb93365f19c3f7f047849133f60047f2642 Mon Sep 17 00:00:00 2001 From: nathan Date: Thu, 14 May 2026 09:47:49 -0700 Subject: [PATCH] Add repository storage guardrails --- .gitignore | 11 +++++++ AGRARIAN_DEVELOPMENT_ROADMAP.md | 4 +++ Docs/RepositoryStoragePolicy.md | 55 +++++++++++++++++++++++++++++++++ Scripts/audit_repo_storage.sh | 51 ++++++++++++++++++++++++++++++ 4 files changed, 121 insertions(+) create mode 100644 Docs/RepositoryStoragePolicy.md create mode 100644 Scripts/audit_repo_storage.sh diff --git a/.gitignore b/.gitignore index aab5b18..6038d99 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,17 @@ Builds/ *.tmp *.cache +# Large terrain/source artifacts stay on DevBox or object storage. +Data/Terrain/Sources/ +Data/Terrain/**/*.laz +Data/Terrain/**/*.las +Data/Terrain/**/*.tif +Data/Terrain/**/*.tiff +Data/Terrain/**/*.zip +Data/Terrain/**/*.7z +Data/Terrain/**/*.tar +Data/Terrain/**/*.tar.gz + # OS files .DS_Store Thumbs.db diff --git a/AGRARIAN_DEVELOPMENT_ROADMAP.md b/AGRARIAN_DEVELOPMENT_ROADMAP.md index ecf9130..3316f58 100644 --- a/AGRARIAN_DEVELOPMENT_ROADMAP.md +++ b/AGRARIAN_DEVELOPMENT_ROADMAP.md @@ -171,6 +171,7 @@ Remaining version 0.01 cleanup before moving deeper into new gameplay: - [!] Create protected `main` branch. Blocked while the repo remains private on the current GitHub plan; GitHub API reports this requires GitHub Pro or making the repository public. - [?] Decide whether to create/use a long-lived `dev` branch. - [~] Finish branch naming and commit message conventions. +- [x] Define GitHub/LFS free-tier storage guardrails. - [ ] Define backup expectations for NAS and repository. - [ ] Create repeatable dedicated server build instructions. - [~] Finish required plugin documentation. @@ -195,6 +196,7 @@ Goal: Prepare the project so all future development is controlled, recoverable, - [?] Create `dev` branch if we want staging before main. - [~] Define branch naming conventions. - [~] Define commit message conventions. +- [x] Define GitHub/LFS free-tier storage guardrails. - [ ] Define backup expectations for NAS and repo. - [x] Confirm this roadmap file is committed or otherwise backed up. @@ -1239,6 +1241,7 @@ These tracks run across all phases and must not be left as afterthoughts. - [x] Define dev editor build process. - [x] Define internal test build process. +- [x] Add repository storage policy and local storage audit script. - [ ] Define closed alpha build process. - [ ] Define dedicated server packaging. - [ ] Define patch distribution. @@ -1400,6 +1403,7 @@ Earliest incomplete foundation items: - [?] Decide whether to create/use a long-lived `dev` branch. - [~] Finish branch naming conventions. - [~] Finish commit message conventions. +- [x] Define GitHub/LFS free-tier storage guardrails. - [ ] Define backup expectations for NAS and repository. - [ ] Create repeatable dedicated server build instructions. - [~] Finish required plugin documentation. diff --git a/Docs/RepositoryStoragePolicy.md b/Docs/RepositoryStoragePolicy.md new file mode 100644 index 0000000..601a298 --- /dev/null +++ b/Docs/RepositoryStoragePolicy.md @@ -0,0 +1,55 @@ +# Repository Storage Policy + +Agrarian should stay on free GitHub/LFS limits as long as practical. GitHub is +the source-control system, not the long-term asset warehouse. + +## GitHub Is For + +- Source code. +- Config files. +- Scripts and build wrappers. +- Design docs, roadmap docs, and small metadata files. +- Curated Unreal assets needed to open and build the current project. +- Small data assets and test fixtures. +- LFS-tracked `.uasset`, `.umap`, small media, and other binary files that are + intentionally part of the working game project. + +## GitHub Is Not For + +- Packaged builds. +- Raw DEM, lidar, bathymetry, satellite, or large GIS source datasets. +- Generated terrain tile packages beyond curated MVP samples. +- Derived Data Cache. +- Unreal `Intermediate/`, `Saved/`, and generated build products. +- Large marketplace/source-art libraries that are not actively used by the + current build. +- Full-resolution marketing captures, raw videos, or archival exports. +- Long-term Earth-scale tile cache data. + +## Large Data Homes + +- DevBox project storage is the first local home for large working data. +- Future object storage should hold generated tile packages, source terrain + datasets, build artifacts, and public download assets. +- Git should store manifests, checksums, provenance, import scripts, and small + curated samples that let us reproduce large artifacts from source storage. + +## Budget Targets + +- Keep Git repository history small enough that clone/fetch remains practical. +- Treat Git LFS free storage and bandwidth as a scarce resource. +- Investigate before adding any single LFS object over `100 MB`. +- Avoid committing generated binary assets that change frequently. +- Never store full packaged builds in Git or Git LFS. + +## Required Checks + +Run this before adding large content or after any major asset import: + +```bash +Scripts/audit_repo_storage.sh +``` + +If the audit shows large or frequently changing LFS files, decide whether they +belong in GitHub, DevBox artifact storage, or future object storage before +pushing. diff --git a/Scripts/audit_repo_storage.sh b/Scripts/audit_repo_storage.sh new file mode 100644 index 0000000..30c4799 --- /dev/null +++ b/Scripts/audit_repo_storage.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +set -euo pipefail + +repo_root="$(git rev-parse --show-toplevel)" +cd "$repo_root" + +echo "Agrarian repository storage audit" +echo "Repository: $repo_root" +echo + +echo "Working tree size:" +du -sh . 2>/dev/null || true +echo + +echo "Git object database:" +git count-objects -vH +echo + +echo "Largest tracked files in HEAD:" +largest_tracked="$(mktemp)" +git ls-tree -r -l HEAD | sort -k4 -nr > "$largest_tracked" +head -n 25 "$largest_tracked" | awk '{ printf "%10s %s\n", $4, $5 }' +rm -f "$largest_tracked" +echo + +if git lfs version >/dev/null 2>&1; then + echo "Largest Git LFS objects:" + largest_lfs="$(mktemp)" + git lfs ls-files -s | sed -E 's/^.*\\(([^)]*)\\)$/\\1 &/' | sort -hr > "$largest_lfs" + head -n 25 "$largest_lfs" + rm -f "$largest_lfs" +else + echo "Git LFS is not installed." +fi +echo + +echo "Generated/local directories:" +for path in Binaries Intermediate Saved DerivedDataCache .vs Builds; do + if [ -e "$path" ]; then + du -sh "$path" 2>/dev/null || true + fi +done +echo + +cat <<'NOTE' +Policy reminder: +- Do not store packaged builds, raw DEM/lidar/GIS datasets, DerivedDataCache, or + generated terrain tile caches in GitHub. +- Use Git for source, scripts, metadata, docs, and curated project assets. +- Use DevBox or future object storage for large artifacts and source data. +NOTE