build(ENG-21491): Improve sccache behavior on developer machines (#24568)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
Marko Vejnovic
2025-11-12 09:11:33 -08:00
committed by GitHub
parent e0aae8adc1
commit 2563a9b3ad
8 changed files with 521 additions and 96 deletions

View File

@@ -125,7 +125,8 @@ setx(CWD ${CMAKE_SOURCE_DIR})
setx(BUILD_PATH ${CMAKE_BINARY_DIR})
optionx(CACHE_PATH FILEPATH "The path to the cache directory" DEFAULT ${BUILD_PATH}/cache)
optionx(CACHE_STRATEGY "read-write|read-only|none" "The strategy to use for caching" DEFAULT "read-write")
optionx(CACHE_STRATEGY "auto|distributed|local|none" "The strategy to use for caching" DEFAULT
"auto")
optionx(CI BOOL "If CI is enabled" DEFAULT OFF)
optionx(ENABLE_ANALYSIS BOOL "If static analysis targets should be enabled" DEFAULT OFF)
@@ -141,9 +142,39 @@ optionx(TMP_PATH FILEPATH "The path to the temporary directory" DEFAULT ${BUILD_
# --- Helper functions ---
# list_filter_out_regex()
#
# Description:
# Filters out elements from a list that match a regex pattern.
#
# Arguments:
# list - The list of strings to traverse
# pattern - The regex pattern to filter out
# touched - A variable to set if any items were removed
function(list_filter_out_regex list pattern touched)
set(result_list "${${list}}")
set(keep_list)
set(was_modified OFF)
foreach(line IN LISTS result_list)
if(line MATCHES "${pattern}")
set(was_modified ON)
else()
list(APPEND keep_list ${line})
endif()
endforeach()
set(${list} "${keep_list}" PARENT_SCOPE)
set(${touched} ${was_modified} PARENT_SCOPE)
endfunction()
# setenv()
# Description:
# Sets an environment variable during the build step, and writes it to a .env file.
#
# See Also:
# unsetenv()
#
# Arguments:
# variable string - The variable to set
# value string - The value to set the variable to
@@ -156,13 +187,7 @@ function(setenv variable value)
if(EXISTS ${ENV_PATH})
file(STRINGS ${ENV_PATH} ENV_FILE ENCODING UTF-8)
foreach(line ${ENV_FILE})
if(line MATCHES "^${variable}=")
list(REMOVE_ITEM ENV_FILE ${line})
set(ENV_MODIFIED ON)
endif()
endforeach()
list_filter_out_regex(ENV_FILE "^${variable}=" ENV_MODIFIED)
if(ENV_MODIFIED)
list(APPEND ENV_FILE "${variable}=${value}")
@@ -178,6 +203,28 @@ function(setenv variable value)
message(STATUS "Set ENV ${variable}: ${value}")
endfunction()
# See setenv()
# Description:
# Exact opposite of setenv().
# Arguments:
# variable string - The variable to unset.
# See Also:
# setenv()
function(unsetenv variable)
set(ENV_PATH ${BUILD_PATH}/.env)
if(NOT EXISTS ${ENV_PATH})
return()
endif()
file(STRINGS ${ENV_PATH} ENV_FILE ENCODING UTF-8)
list_filter_out_regex(ENV_FILE "^${variable}=" ENV_MODIFIED)
if(ENV_MODIFIED)
list(JOIN ENV_FILE "\n" ENV_FILE)
file(WRITE ${ENV_PATH} ${ENV_FILE})
endif()
endfunction()
# satisfies_range()
# Description:
# Check if a version satisfies a version range or list of ranges

View File

@@ -1,90 +1,131 @@
# Setup sccache as the C and C++ compiler launcher to speed up builds by caching
if(CACHE_STRATEGY STREQUAL "none")
return()
endif()
function(check_aws_credentials OUT_VAR)
set(HAS_CREDENTIALS FALSE)
set(SCCACHE_SHARED_CACHE_REGION "us-west-1")
set(SCCACHE_SHARED_CACHE_BUCKET "bun-build-sccache-store")
if(DEFINED ENV{AWS_ACCESS_KEY_ID} AND DEFINED ENV{AWS_SECRET_ACCESS_KEY})
set(HAS_CREDENTIALS TRUE)
message(NOTICE
"sccache: Using AWS credentials found in environment variables")
# Function to check if the system AWS credentials have access to the sccache S3 bucket.
function(check_aws_credentials OUT_VAR)
# Install dependencies first
execute_process(
COMMAND
${BUN_EXECUTABLE}
install
--frozen-lockfile
WORKING_DIRECTORY
${CMAKE_SOURCE_DIR}/scripts/build-cache
RESULT_VARIABLE INSTALL_EXIT_CODE
OUTPUT_VARIABLE INSTALL_OUTPUT
ERROR_VARIABLE INSTALL_ERROR
)
if(NOT INSTALL_EXIT_CODE EQUAL 0)
message(FATAL_ERROR "Failed to install dependencies in scripts/build-cache\n"
"Exit code: ${INSTALL_EXIT_CODE}\n"
"Output: ${INSTALL_OUTPUT}\n"
"Error: ${INSTALL_ERROR}")
endif()
# Check for ~/.aws directory since sccache may use that.
if(NOT HAS_CREDENTIALS)
if(WIN32)
set(AWS_CONFIG_DIR "$ENV{USERPROFILE}/.aws")
else()
set(AWS_CONFIG_DIR "$ENV{HOME}/.aws")
endif()
# Check AWS credentials
execute_process(
COMMAND
${BUN_EXECUTABLE}
run
have-access.ts
--bucket ${SCCACHE_SHARED_CACHE_BUCKET}
--region ${SCCACHE_SHARED_CACHE_REGION}
WORKING_DIRECTORY
${CMAKE_SOURCE_DIR}/scripts/build-cache
RESULT_VARIABLE HAVE_ACCESS_EXIT_CODE
)
if(EXISTS "${AWS_CONFIG_DIR}/credentials")
set(HAS_CREDENTIALS TRUE)
message(NOTICE
"sccache: Using AWS credentials found in ${AWS_CONFIG_DIR}/credentials")
endif()
if(HAVE_ACCESS_EXIT_CODE EQUAL 0)
set(HAS_CREDENTIALS TRUE)
else()
set(HAS_CREDENTIALS FALSE)
endif()
set(${OUT_VAR} ${HAS_CREDENTIALS} PARENT_SCOPE)
endfunction()
function(check_running_in_ci OUT_VAR)
set(IS_CI FALSE)
# Query EC2 instance metadata service to check if running on buildkite-agent
# The IP address 169.254.169.254 is a well-known link-local address for querying EC2 instance
# metdata:
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
execute_process(
COMMAND curl -s -m 0.5 http://169.254.169.254/latest/meta-data/tags/instance/Service
OUTPUT_VARIABLE METADATA_OUTPUT
ERROR_VARIABLE METADATA_ERROR
RESULT_VARIABLE METADATA_RESULT
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_QUIET
)
# Check if the request succeeded and returned exactly "buildkite-agent"
if(METADATA_RESULT EQUAL 0 AND METADATA_OUTPUT STREQUAL "buildkite-agent")
set(IS_CI TRUE)
endif()
set(${OUT_VAR} ${IS_CI} PARENT_SCOPE)
# Configure sccache to use the local cache only.
function(sccache_configure_local_filesystem)
unsetenv(SCCACHE_BUCKET)
unsetenv(SCCACHE_REGION)
setenv(SCCACHE_DIR "${CACHE_PATH}/sccache")
endfunction()
check_running_in_ci(IS_IN_CI)
find_command(VARIABLE SCCACHE_PROGRAM COMMAND sccache REQUIRED ${IS_IN_CI})
if(NOT SCCACHE_PROGRAM)
message(WARNING "sccache not found. Your builds will be slower.")
return()
endif()
# Configure sccache to use the distributed cache (S3 + local).
function(sccache_configure_distributed)
setenv(SCCACHE_BUCKET "${SCCACHE_SHARED_CACHE_BUCKET}")
setenv(SCCACHE_REGION "${SCCACHE_SHARED_CACHE_REGION}")
setenv(SCCACHE_DIR "${CACHE_PATH}/sccache")
endfunction()
set(SCCACHE_ARGS CMAKE_C_COMPILER_LAUNCHER CMAKE_CXX_COMPILER_LAUNCHER)
foreach(arg ${SCCACHE_ARGS})
setx(${arg} ${SCCACHE_PROGRAM})
list(APPEND CMAKE_ARGS -D${arg}=${${arg}})
endforeach()
# Configure S3 bucket for distributed caching
setenv(SCCACHE_BUCKET "bun-build-sccache-store")
setenv(SCCACHE_REGION "us-west-1")
setenv(SCCACHE_DIR "${CACHE_PATH}/sccache")
# Handle credentials based on cache strategy
if (CACHE_STRATEGY STREQUAL "read-only")
setenv(SCCACHE_S3_NO_CREDENTIALS "1")
message(STATUS "sccache configured in read-only mode.")
else()
# Check for AWS credentials and enable anonymous access if needed
check_aws_credentials(HAS_AWS_CREDENTIALS)
if(NOT IS_IN_CI AND NOT HAS_AWS_CREDENTIALS)
setenv(SCCACHE_S3_NO_CREDENTIALS "1")
message(NOTICE "sccache: No AWS credentials found, enabling anonymous S3 "
"access. Writing to the cache will be disabled.")
function(sccache_configure_environment_ci)
if(CACHE_STRATEGY STREQUAL "auto" OR CACHE_STRATEGY STREQUAL "distributed")
check_aws_credentials(HAS_AWS_CREDENTIALS)
if(HAS_AWS_CREDENTIALS)
sccache_configure_distributed()
message(NOTICE "sccache: Using distributed cache strategy.")
else()
message(FATAL_ERROR "CI CACHE_STRATEGY is set to '${CACHE_STRATEGY}', but no valid AWS "
"credentials were found. Note that 'auto' requires AWS credentials to access the shared "
"cache in CI.")
endif()
elseif(CACHE_STRATEGY STREQUAL "local")
# We disallow this because we want our CI runs to always used the shared cache to accelerate
# builds.
# none, distributed and auto are all okay.
#
# If local is configured, it's as good as "none", so this is probably user error.
message(FATAL_ERROR "CI CACHE_STRATEGY is set to 'local', which is not allowed.")
endif()
endif()
endfunction()
setenv(SCCACHE_LOG "info")
function(sccache_configure_environment_developer)
# Local environments can use any strategy they like. S3 is set up in such a way so as to clean
# itself from old entries automatically.
if (CACHE_STRATEGY STREQUAL "auto" OR CACHE_STRATEGY STREQUAL "local")
# In the local environment, we prioritize using the local cache. This is because sccache takes
# into consideration the whole absolute path of the files being compiled, and it's very
# unlikely users will have the same absolute paths on their local machines.
sccache_configure_local_filesystem()
message(NOTICE "sccache: Using local cache strategy.")
elseif(CACHE_STRATEGY STREQUAL "distributed")
check_aws_credentials(HAS_AWS_CREDENTIALS)
if(HAS_AWS_CREDENTIALS)
sccache_configure_distributed()
message(NOTICE "sccache: Using distributed cache strategy.")
else()
message(FATAL_ERROR "CACHE_STRATEGY is set to 'distributed', but no valid AWS credentials "
"were found.")
endif()
endif()
endfunction()
message(STATUS "sccache configured for bun-build-sccache-store (us-west-1).")
function(sccache_configure)
find_command(VARIABLE SCCACHE_PROGRAM COMMAND sccache REQUIRED ${CI})
if(NOT SCCACHE_PROGRAM)
message(WARNING "sccache not found. Your builds will be slower.")
return()
endif()
set(SCCACHE_ARGS CMAKE_C_COMPILER_LAUNCHER CMAKE_CXX_COMPILER_LAUNCHER)
foreach(arg ${SCCACHE_ARGS})
setx(${arg} ${SCCACHE_PROGRAM})
list(APPEND CMAKE_ARGS -D${arg}=${${arg}})
endforeach()
setenv(SCCACHE_LOG "info")
if (CI)
sccache_configure_environment_ci()
else()
sccache_configure_environment_developer()
endif()
endfunction()
sccache_configure()