mirror of
https://github.com/oven-sh/bun
synced 2026-02-02 15:08:46 +00:00
[publish images] [BAPI-2103] use gdb and bun.report code to print stack traces upon crash in CI (#21143)
### What does this PR do? Closes #13012 On Linux, when any Bun process spawned by `runner.node.mjs` crashes, we run GDB in batch mode to print a backtrace from the core file. And on all platforms, we run a mini `bun.report` server which collects crashes reported by any Bun process executed during the tests, and after each test `runner.node.mjs` fetches and prints any new crashes from the server. <details> <summary>example 1</summary> ``` #0 crash_handler.crash () at crash_handler.zig:1513 #1 0x0000000002cf4020 in crash_handler.crashHandler (reason=..., error_return_trace=0x0, begin_addr=...) at crash_handler.zig:479 #2 0x0000000002cefe25 in crash_handler.handleSegfaultPosix (sig=<optimized out>, info=<optimized out>) at crash_handler.zig:800 #3 0x00000000045a1124 in WTF::jscSignalHandler (sig=11, info=0x7ffe044e30b0, ucontext=0x0) at vendor/WebKit/Source/WTF/wtf/threads/Signals.cpp:548 #4 <signal handler called> #5 JSC::JSCell::type (this=0x0) at vendor/WebKit/Source/JavaScriptCore/runtime/JSCellInlines.h:137 #6 JSC::JSObject::getOwnNonIndexPropertySlot (this=0x150bc914fe18, vm=..., structure=0x150a0102de50, propertyName=..., slot=...) at vendor/WebKit/Source/JavaScriptCore/runtime/JSObject.h:1348 #7 JSC::JSObject::getPropertySlot<false> (this=0x150bc914fe18, globalObject=0x150b864e0088, propertyName=..., slot=...) at vendor/WebKit/Source/JavaScriptCore/runtime/JSObject.h:1433 #8 JSC::JSValue::getPropertySlot (this=0x7ffe044e4880, globalObject=0x150b864e0088, propertyName=..., slot=...) at vendor/WebKit/Source/JavaScriptCore/runtime/JSCJSValueInlines.h:1108 #9 JSC::JSValue::get (this=0x7ffe044e4880, globalObject=0x150b864e0088, propertyName=..., slot=...) at vendor/WebKit/Source/JavaScriptCore/runtime/JSCJSValueInlines.h:1065 #10 JSC::LLInt::performLLIntGetByID (bytecodeIndex=..., codeBlock=0x150b861e7740, globalObject=0x150b864e0088, baseValue=..., ident=..., metadata=...) at vendor/WebKit/Source/JavaScriptCore/llint/LLIntSlowPaths.cpp:878 #11 0x0000000004d7b055 in llint_slow_path_get_by_id (callFrame=0x7ffe044e4ab0, pc=0x150bc92ea0e7) at vendor/WebKit/Source/JavaScriptCore/llint/LLIntSlowPaths.cpp:946 #12 0x0000000003dd6042 in llint_op_get_by_id () #13 0x0000000000000000 in ?? () ``` </details> <details> <summary>example 2</summary> ``` #0 crash_handler.crash () at crash_handler.zig:1513 #1 0x0000000002c5db80 in crash_handler.crashHandler (reason=..., error_return_trace=0x0, begin_addr=...) at crash_handler.zig:479 #2 0x0000000002c59f60 in crash_handler.handleSegfaultPosix (sig=<optimized out>, info=<optimized out>) at crash_handler.zig:800 #3 0x00000000042ecc88 in WTF::jscSignalHandler (sig=11, info=0xfffff60141b0, ucontext=0xfffff6014230) at vendor/WebKit/Source/WTF/wtf/threads/Signals.cpp:548 #4 <signal handler called> #5 bun.js.api.FFIObject.Reader.u8 (globalObject=0x4000554e0088) at /var/lib/buildkite-agent/builds/ip-172-31-75-92/bun/bun/src/bun.js/api/FFIObject.zig:65 #6 bun.js.jsc.host_fn.toJSHostCall__anon_1711576 (globalThis=0x4000554e0088, args=...) at /var/lib/buildkite-agent/builds/ip-172-31-75-92/bun/bun/src/bun.js/jsc/host_fn.zig:97 #7 bun.js.jsc.host_fn.DOMCall("Reader"[0..6],bun.js.api.FFIObject.Reader,"u8"[0..2],.{ .reads = .{ ... }, .writes = .{ ... } }).slowpath (globalObject=0x4000554e0088, thisValue=70370172175040, arguments_ptr=0xfffff6015460, arguments_len=1) at /var/lib/buildkite-agent/builds/ip-172-31-75-92/bun/bun/src/bun.js/jsc/host_fn.zig:490 #8 0x000040003419003c in ?? () #9 0x0000400055173440 in ?? () ``` </details> I used GDB instead of LLDB (as the branch name suggests) because it seems to produce more useful stack traces with musl libc. - [x] on linux, use gdb to print from core dump of main bun process crashed - [x] on linux, use gdb to print from all new core dumps (so including bun subprocesses spawned by the test that crashed) - [x] on all platforms, use a mini bun.report server to print a self-reported trace (depends on oven-sh/bun.report#15; for now our package.json points to a commit on the branch of that repo) - [x] fix trying to fetch stack traces too early on windows - [x] use output groups so the traces show up alongside the log for the specific test instead of having to find it in the logs from the entire run - [x] get oven-sh/bun.report#15 merged, and point to a bun.report commit on the main branch instead of the PR branch in package.json ### How did you verify your code works? Manually, and in CI with a crashing test. --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -30,6 +30,8 @@ import {
|
||||
import { readFile } from "node:fs/promises";
|
||||
import { availableParallelism, userInfo } from "node:os";
|
||||
import { basename, dirname, extname, join, relative, sep } from "node:path";
|
||||
import { createInterface } from "node:readline";
|
||||
import { setTimeout as setTimeoutPromise } from "node:timers/promises";
|
||||
import { parseArgs } from "node:util";
|
||||
import pLimit from "./p-limit.mjs";
|
||||
import {
|
||||
@@ -159,6 +161,10 @@ const { values: options, positionals: filters } = parseArgs({
|
||||
type: "boolean",
|
||||
default: false,
|
||||
},
|
||||
["fail-on-coredump-or-report"]: {
|
||||
type: "boolean",
|
||||
default: false, // STAB-861
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
@@ -177,6 +183,25 @@ if (options["quiet"]) {
|
||||
isQuiet = true;
|
||||
}
|
||||
|
||||
let coresDir;
|
||||
|
||||
if (options["coredump-upload"]) {
|
||||
// this sysctl is set in bootstrap.sh to /var/bun-cores-$distro-$release-$arch
|
||||
const sysctl = await spawnSafe({ command: "sysctl", args: ["-n", "kernel.core_pattern"] });
|
||||
coresDir = sysctl.stdout;
|
||||
if (sysctl.ok) {
|
||||
if (coresDir.startsWith("|")) {
|
||||
throw new Error("cores are being piped not saved");
|
||||
}
|
||||
// change /foo/bar/%e-%p.core to /foo/bar
|
||||
coresDir = dirname(sysctl.stdout);
|
||||
} else {
|
||||
throw new Error(`Failed to check core_pattern: ${sysctl.error}`);
|
||||
}
|
||||
}
|
||||
|
||||
let remapPort = undefined;
|
||||
|
||||
/**
|
||||
* @typedef {Object} TestExpectation
|
||||
* @property {string} filename
|
||||
@@ -450,6 +475,40 @@ async function runTests() {
|
||||
}
|
||||
|
||||
if (!failedResults.length) {
|
||||
// bun install has succeeded
|
||||
const { promise: portPromise, resolve: portResolve } = Promise.withResolvers();
|
||||
const { promise: errorPromise, resolve: errorResolve } = Promise.withResolvers();
|
||||
console.log("run in", cwd);
|
||||
let exiting = false;
|
||||
|
||||
const server = spawn(execPath, ["run", "ci-remap-server", execPath, cwd, getCommit()], {
|
||||
stdio: ["ignore", "pipe", "inherit"],
|
||||
cwd, // run in main repo
|
||||
env: { ...process.env, BUN_DEBUG_QUIET_LOGS: "1", NO_COLOR: "1" },
|
||||
});
|
||||
server.unref();
|
||||
server.on("error", errorResolve);
|
||||
server.on("exit", (code, signal) => {
|
||||
if (!exiting && (code !== 0 || signal !== null)) errorResolve(signal ? signal : "code " + code);
|
||||
});
|
||||
process.on("exit", () => {
|
||||
exiting = true;
|
||||
server.kill();
|
||||
});
|
||||
const lines = createInterface(server.stdout);
|
||||
lines.on("line", line => {
|
||||
portResolve({ port: parseInt(line) });
|
||||
});
|
||||
|
||||
const result = await Promise.race([portPromise, errorPromise, setTimeoutPromise(5000, "timeout")]);
|
||||
if (typeof result.port != "number") {
|
||||
server.kill();
|
||||
console.warn("ci-remap server did not start:", result);
|
||||
} else {
|
||||
console.log("crash reports parsed on port", result.port);
|
||||
remapPort = result.port;
|
||||
}
|
||||
|
||||
await Promise.all(
|
||||
tests.map(testPath =>
|
||||
limit(() => {
|
||||
@@ -469,7 +528,7 @@ async function runTests() {
|
||||
env.BUN_JSC_validateExceptionChecks = "1";
|
||||
}
|
||||
return runTest(title, async () => {
|
||||
const { ok, error, stdout } = await spawnBun(execPath, {
|
||||
const { ok, error, stdout, crashes } = await spawnBun(execPath, {
|
||||
cwd: cwd,
|
||||
args: [
|
||||
subcommand,
|
||||
@@ -482,7 +541,8 @@ async function runTests() {
|
||||
stderr: parallelism > 1 ? () => {} : chunk => pipeTestStdout(process.stderr, chunk),
|
||||
});
|
||||
const mb = 1024 ** 3;
|
||||
const stdoutPreview = stdout.slice(0, mb).split("\n").slice(0, 50).join("\n");
|
||||
let stdoutPreview = stdout.slice(0, mb).split("\n").slice(0, 50).join("\n");
|
||||
if (crashes) stdoutPreview += crashes;
|
||||
return {
|
||||
testPath: title,
|
||||
ok: ok,
|
||||
@@ -642,19 +702,6 @@ async function runTests() {
|
||||
|
||||
if (options["coredump-upload"]) {
|
||||
try {
|
||||
// this sysctl is set in bootstrap.sh to /var/bun-cores-$distro-$release-$arch
|
||||
const sysctl = await spawnSafe({ command: "sysctl", args: ["-n", "kernel.core_pattern"] });
|
||||
let coresDir = sysctl.stdout;
|
||||
if (sysctl.ok) {
|
||||
if (coresDir.startsWith("|")) {
|
||||
throw new Error("cores are being piped not saved");
|
||||
}
|
||||
// change /foo/bar/%e-%p.core to /foo/bar
|
||||
coresDir = dirname(sysctl.stdout);
|
||||
} else {
|
||||
throw new Error(`Failed to check core_pattern: ${sysctl.error}`);
|
||||
}
|
||||
|
||||
const coresDirBase = dirname(coresDir);
|
||||
const coresDirName = basename(coresDir);
|
||||
const coreFileNames = readdirSync(coresDir);
|
||||
@@ -760,6 +807,7 @@ async function runTests() {
|
||||
* @property {number} timestamp
|
||||
* @property {number} duration
|
||||
* @property {string} stdout
|
||||
* @property {number} [pid]
|
||||
*/
|
||||
|
||||
/**
|
||||
@@ -921,6 +969,7 @@ async function spawnSafe(options) {
|
||||
stdout: buffer,
|
||||
timestamp: timestamp || Date.now(),
|
||||
duration: duration || 0,
|
||||
pid: subprocess?.pid,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -949,10 +998,16 @@ function getCombinedPath(execPath) {
|
||||
return _combinedPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* @typedef {object} SpawnBunResult
|
||||
* @extends SpawnResult
|
||||
* @property {string} [crashes]
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param {string} execPath Path to bun binary
|
||||
* @param {SpawnOptions} options
|
||||
* @returns {Promise<SpawnResult>}
|
||||
* @returns {Promise<SpawnBunResult>}
|
||||
*/
|
||||
async function spawnBun(execPath, { args, cwd, timeout, env, stdout, stderr }) {
|
||||
const path = getCombinedPath(execPath);
|
||||
@@ -971,11 +1026,13 @@ async function spawnBun(execPath, { args, cwd, timeout, env, stdout, stderr }) {
|
||||
BUN_DEBUG_QUIET_LOGS: "1",
|
||||
BUN_GARBAGE_COLLECTOR_LEVEL: "1",
|
||||
BUN_JSC_randomIntegrityAuditRate: "1.0",
|
||||
BUN_ENABLE_CRASH_REPORTING: "0", // change this to '1' if https://github.com/oven-sh/bun/issues/13012 is implemented
|
||||
BUN_RUNTIME_TRANSPILER_CACHE_PATH: "0",
|
||||
BUN_INSTALL_CACHE_DIR: tmpdirPath,
|
||||
SHELLOPTS: isWindows ? "igncr" : undefined, // ignore "\r" on Windows
|
||||
TEST_TMPDIR: tmpdirPath, // Used in Node.js tests.
|
||||
...(typeof remapPort == "number"
|
||||
? { BUN_CRASH_REPORT_URL: `http://localhost:${remapPort}` }
|
||||
: { BUN_ENABLE_CRASH_REPORTING: "0" }),
|
||||
};
|
||||
|
||||
if (basename(execPath).includes("asan")) {
|
||||
@@ -999,7 +1056,8 @@ async function spawnBun(execPath, { args, cwd, timeout, env, stdout, stderr }) {
|
||||
bunEnv["TEMP"] = tmpdirPath;
|
||||
}
|
||||
try {
|
||||
return await spawnSafe({
|
||||
const existingCores = options["coredump-upload"] ? readdirSync(coresDir) : [];
|
||||
const result = await spawnSafe({
|
||||
command: execPath,
|
||||
args,
|
||||
cwd,
|
||||
@@ -1008,6 +1066,85 @@ async function spawnBun(execPath, { args, cwd, timeout, env, stdout, stderr }) {
|
||||
stdout,
|
||||
stderr,
|
||||
});
|
||||
const newCores = options["coredump-upload"] ? readdirSync(coresDir).filter(c => !existingCores.includes(c)) : [];
|
||||
let crashes = "";
|
||||
if (options["coredump-upload"] && (result.signalCode !== null || newCores.length > 0)) {
|
||||
// warn if the main PID crashed and we don't have a core
|
||||
if (result.signalCode !== null && !newCores.some(c => c.endsWith(`${result.pid}.core`))) {
|
||||
crashes += `main process killed by ${result.signalCode} but no core file found\n`;
|
||||
}
|
||||
|
||||
if (options["fail-on-coredump-or-report"] && newCores.length > 0) {
|
||||
result.ok = false;
|
||||
if (!isAlwaysFailure(result.error)) result.error = "core dumped";
|
||||
}
|
||||
|
||||
for (const coreName of newCores) {
|
||||
const corePath = join(coresDir, coreName);
|
||||
let out = "";
|
||||
const gdb = await spawnSafe({
|
||||
command: "gdb",
|
||||
args: ["-batch", `--eval-command=bt`, "--core", corePath, execPath],
|
||||
timeout: 240_000,
|
||||
stderr: () => {},
|
||||
stdout(text) {
|
||||
out += text;
|
||||
},
|
||||
});
|
||||
if (!gdb.ok) {
|
||||
crashes += `failed to get backtrace from GDB: ${gdb.error}\n`;
|
||||
} else {
|
||||
crashes += `======== Stack trace from GDB for ${coreName}: ========\n`;
|
||||
for (const line of out.split("\n")) {
|
||||
// filter GDB output since it is pretty verbose
|
||||
if (
|
||||
line.startsWith("Program terminated") ||
|
||||
line.startsWith("#") || // gdb backtrace lines start with #0, #1, etc.
|
||||
line.startsWith("[Current thread is")
|
||||
) {
|
||||
crashes += line + "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Skip this if the remap server didn't work or if Bun exited normally
|
||||
// (tests in which a subprocess crashed should at least set exit code 1)
|
||||
if (typeof remapPort == "number" && result.exitCode !== 0) {
|
||||
try {
|
||||
// When Bun crashes, it exits before the subcommand it runs to upload the crash report has necessarily finished.
|
||||
// So wait a little bit to make sure that the crash report has at least started uploading
|
||||
// (once the server sees the /ack request then /traces will wait for any crashes to finish processing)
|
||||
await setTimeoutPromise(500);
|
||||
const response = await fetch(`http://localhost:${remapPort}/traces`);
|
||||
if (!response.ok || response.status !== 200) throw new Error(`server responded with code ${response.status}`);
|
||||
const traces = await response.json();
|
||||
if (traces.length > 0) {
|
||||
if (options["fail-on-coredump-or-report"]) {
|
||||
result.ok = false;
|
||||
if (!isAlwaysFailure(result.error)) result.error = "crash reported";
|
||||
}
|
||||
crashes += `${traces.length} crashes reported during this test\n`;
|
||||
for (const t of traces) {
|
||||
if (t.failed_parse) {
|
||||
crashes += "Trace string failed to parse:\n";
|
||||
crashes += t.failed_parse + "\n";
|
||||
} else if (t.failed_remap) {
|
||||
crashes += "Parsed trace failed to remap:\n";
|
||||
crashes += JSON.stringify(t.failed_remap, null, 2) + "\n";
|
||||
} else {
|
||||
crashes += "================\n";
|
||||
crashes += t.remap + "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
crashes += "failed to fetch traces: " + e.toString() + "\n";
|
||||
}
|
||||
}
|
||||
if (crashes.length > 0) result.crashes = crashes;
|
||||
return result;
|
||||
} finally {
|
||||
try {
|
||||
rmSync(tmpdirPath, { recursive: true, force: true });
|
||||
@@ -1093,7 +1230,7 @@ async function spawnBunTest(execPath, testPath, options = { cwd }) {
|
||||
env.BUN_JSC_validateExceptionChecks = "1";
|
||||
}
|
||||
|
||||
const { ok, error, stdout } = await spawnBun(execPath, {
|
||||
const { ok, error, stdout, crashes } = await spawnBun(execPath, {
|
||||
args: isReallyTest ? testArgs : [...args, absPath],
|
||||
cwd: options["cwd"],
|
||||
timeout: isReallyTest ? timeout : 30_000,
|
||||
@@ -1101,7 +1238,8 @@ async function spawnBunTest(execPath, testPath, options = { cwd }) {
|
||||
stdout: options.stdout,
|
||||
stderr: options.stderr,
|
||||
});
|
||||
const { tests, errors, stdout: stdoutPreview } = parseTestStdout(stdout, testPath);
|
||||
let { tests, errors, stdout: stdoutPreview } = parseTestStdout(stdout, testPath);
|
||||
if (crashes) stdoutPreview += crashes;
|
||||
|
||||
// If we generated a JUnit file and we're on BuildKite, upload it immediately
|
||||
if (junitFilePath && isReallyTest && isBuildkite && cliOptions["junit-upload"]) {
|
||||
@@ -1276,11 +1414,12 @@ function parseTestStdout(stdout, testPath) {
|
||||
* @returns {Promise<TestResult>}
|
||||
*/
|
||||
async function spawnBunInstall(execPath, options) {
|
||||
const { ok, error, stdout, duration } = await spawnBun(execPath, {
|
||||
let { ok, error, stdout, duration, crashes } = await spawnBun(execPath, {
|
||||
args: ["install"],
|
||||
timeout: testTimeout,
|
||||
...options,
|
||||
});
|
||||
if (crashes) stdout += crashes;
|
||||
const relativePath = relative(cwd, options.cwd);
|
||||
const testPath = join(relativePath, "package.json");
|
||||
const status = ok ? "pass" : "fail";
|
||||
@@ -2015,7 +2154,9 @@ function isAlwaysFailure(error) {
|
||||
error.includes("segmentation fault") ||
|
||||
error.includes("illegal instruction") ||
|
||||
error.includes("sigtrap") ||
|
||||
error.includes("error: addresssanitizer")
|
||||
error.includes("error: addresssanitizer") ||
|
||||
error.includes("core dumped") ||
|
||||
error.includes("crash reported")
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user