Revert "Reuse the cache created for latest main on PRs/branches" (#25466)

Revert "Reuse the cache created for latest `main` on PRs/branches if `setup.py` is not modified (#25445)"

This reverts commit 1d75768695.
This commit is contained in:
Yih-Dar 2023-08-11 21:07:08 +02:00 committed by GitHub
parent 5e5fa0d88c
commit fe3c8ab1af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 19 additions and 75 deletions

View File

@ -34,9 +34,9 @@ jobs:
- run: pip install -U --upgrade-strategy eager GitPython
- run: pip install -U --upgrade-strategy eager .
- run: mkdir -p test_preparation
- run: python utils/tests_fetcher.py | tee test_preparation/tests_fetched_summary.txt
- run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt
- store_artifacts:
path: test_preparation/tests_fetched_summary.txt
path: ~/transformers/tests_fetched_summary.txt
- run: |
if [ -f test_list.txt ]; then
cp test_list.txt test_preparation/test_list.txt

View File

@ -18,14 +18,10 @@ import copy
import glob
import os
import random
import subprocess
import yaml
from base64 import b64encode
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional
from git import Repo
import yaml
COMMON_ENV_VARIABLES = {
@ -68,8 +64,6 @@ class CircleCIJob:
working_directory: str = "~/transformers"
# This should be only used for doctest job!
command_timeout: Optional[int] = None
# The explicit checksum to use for cache load/save
checksum: Optional[str] = None
def __post_init__(self):
# Deal with defaults for mutable attributes.
@ -106,14 +100,6 @@ class CircleCIJob:
job["resource_class"] = self.resource_class
if self.parallelism is not None:
job["parallelism"] = self.parallelism
checksum = self.checksum if self.checksum is not None else '{{ checksum "setup.py" }}'
save_cache = True
if self.checksum is not None:
# `setup.py` is not modified and we are not on `main` branch
cache_branch_prefix = "main"
save_cache = False
steps = [
"checkout",
{"attach_workspace": {"at": "~/transformers/test_preparation"}},
@ -121,7 +107,7 @@ class CircleCIJob:
"restore_cache": {
"keys": [
# check the fully-matched cache first
f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-{checksum}",
f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-" + '{{ checksum "setup.py" }}',
# try the partially-matched cache from `main`
f"v{self.cache_version}-{self.cache_name}-main-pip-",
# try the general partially-matched cache
@ -132,7 +118,7 @@ class CircleCIJob:
{
"restore_cache": {
"keys": [
f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-{checksum}",
f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-" + '{{ checksum "setup.py" }}',
f"v{self.cache_version}-{self.cache_name}-main-site-packages-",
f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-",
]
@ -140,11 +126,10 @@ class CircleCIJob:
},
]
steps.extend([{"run": l} for l in self.install_steps])
if save_cache:
steps.append(
{
"save_cache": {
"key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-{checksum}",
"key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-" + '{{ checksum "setup.py" }}',
"paths": ["~/.cache/pip"],
}
}
@ -152,7 +137,7 @@ class CircleCIJob:
steps.append(
{
"save_cache": {
"key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-{checksum}",
"key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-" + '{{ checksum "setup.py" }}',
"paths": ["~/.pyenv/versions/"],
}
}
@ -546,48 +531,11 @@ REPO_UTIL_TESTS = [repo_utils_job]
DOC_TESTS = [doc_test_job]
def get_main_setup_checksum():
PATH_TO_REPO = Path(__file__).parent.parent.resolve()
repo = Repo(PATH_TO_REPO)
current_head = repo.head.ref
main_head = repo.refs.main
setup_file_path = os.path.join(PATH_TO_REPO, "setup.py")
main_head.checkout()
proc = subprocess.Popen(["sha256sum", f"{setup_file_path}"], stdout=subprocess.PIPE)
checksum = proc.stdout.read().decode().split(" ")[0]
checksum = b64encode(bytes.fromhex(checksum)).decode()
# go back to the original branch
current_head.checkout()
return checksum
def create_circleci_config(folder=None):
if folder is None:
folder = os.getcwd()
# Used in CircleCIJob.to_dict() to expand the test list (for using parallelism)
os.environ["test_preparation_dir"] = folder
checksum = None
# if already on `main`, don't try to use the latest commit on `main` to avoid (rare) race condition where multiple
# commits are merged into `main`.
if os.environ.get("CIRCLE_BRANCH", "pull") != "main":
# Check if `setup.py` is modified.
summary_file = os.path.join(folder, "tests_fetched_summary.txt")
if os.path.exists(summary_file):
with open(summary_file) as f:
tests_fetched_summary = f.read()
setup_file_modifiled = "### TEST TO RUN ###\n- tests\n" in tests_fetched_summary
if not setup_file_modifiled:
# If not, we use `setup.py` of the `latest` commit on the `main` branch to compute the checksum for
# cache
checksum = get_main_setup_checksum()
jobs = []
all_test_file = os.path.join(folder, "test_list.txt")
if os.path.exists(all_test_file):
@ -670,10 +618,6 @@ def create_circleci_config(folder=None):
"nightly": {"type": "boolean", "default": False},
"tests_to_run": {"type": "string", "default": test_list},
}
for job in jobs:
job.checksum = checksum
config["jobs"] = {j.job_name: j.to_dict() for j in jobs}
config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
with open(os.path.join(folder, "generated_config.yml"), "w") as f: