fix(http): preserve bytes >= 0x80 in HTTP header values (#27269)

## Summary
- Fix signed `char` comparison bug in uWS `HttpParser.h` that caused
bytes >= 0x80 to be stripped from HTTP header values on platforms where
`char` is signed (x86_64 Linux, x86_64 macOS, ARM macOS)
- Replace `< 33` comparison with explicit checks for SP and HTAB per RFC
9110 Section 5.5
- This also fixes a potential request smuggling vector where
`Transfer-Encoding: \xffchunked\xff` would be interpreted as
`Transfer-Encoding: chunked`

Closes #8893

## Root Cause
In `packages/bun-uws/src/HttpParser.h`, the `getHeaders` function
trimmed whitespace from header values using:
```cpp
while (headers->value.back() < 33) { ... }
```
`std::string_view::back()` returns `char`, which is **signed** on
x86_64. Bytes 0x80-0xFF are negative values (-128 to -1), all less than
33, so they were incorrectly stripped as whitespace.

## Fix
Replace the numeric comparison with explicit OWS character checks:
```cpp
while (headers->value.back() == ' ' || headers->value.back() == '\t') { ... }
```
This matches RFC 9110 Section 5.5 which defines OWS (Optional White
Space) as only SP and HTAB.

## Test plan
- [x] Added regression test `test/regression/issue/08893.test.ts` that
sends raw HTTP requests with 0xFF bytes in header values and verifies
they are preserved
- [x] Added test that SP/HTAB trimming still works correctly
- [x] `bun bd test test/regression/issue/08893.test.ts` passes (2/2
tests)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Bot <claude-bot@bun.sh>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Jarred Sumner <jarred@jarredsumner.com>
This commit is contained in:
robobun
2026-03-01 00:08:02 -08:00
committed by GitHub
parent c5f0e4adf7
commit 668d960f1a
2 changed files with 86 additions and 4 deletions

View File

@@ -504,6 +504,11 @@ namespace uWS
return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) || c == '-';
}
/* RFC 9110 Section 5.5: optional whitespace (OWS) is SP or HTAB */
static inline bool isHTTPHeaderValueWhitespace(unsigned char c) {
return c == ' ' || c == '\t';
}
static inline int isHTTPorHTTPSPrefixForProxies(char *data, char *end) {
// We can check 8 because:
// 1. If it's "http://" that's 7 bytes, and it's supposed to at least have a trailing slash.
@@ -775,13 +780,13 @@ namespace uWS
/* Store this header, it is valid */
headers->value = std::string_view(preliminaryValue, (size_t) (postPaddedBuffer - preliminaryValue));
postPaddedBuffer += 2;
/* Trim trailing whitespace (SP, HTAB) */
while (headers->value.length() && headers->value.back() < 33) {
/* Trim trailing whitespace (SP, HTAB) per RFC 9110 Section 5.5 */
while (headers->value.length() && isHTTPHeaderValueWhitespace(headers->value.back())) {
headers->value.remove_suffix(1);
}
/* Trim initial whitespace (SP, HTAB) */
while (headers->value.length() && headers->value.front() < 33) {
/* Trim initial whitespace (SP, HTAB) per RFC 9110 Section 5.5 */
while (headers->value.length() && isHTTPHeaderValueWhitespace(headers->value.front())) {
headers->value.remove_prefix(1);
}

View File

@@ -0,0 +1,77 @@
import { expect, test } from "bun:test";
import net from "net";
// Regression test for https://github.com/oven-sh/bun/issues/8893
// Bytes >= 0x80 in HTTP header values were incorrectly stripped because
// the whitespace trimming in HttpParser.h compared signed chars against 33.
// On platforms where char is signed (x86_64), bytes 0x80-0xFF are negative
// and thus < 33, causing them to be trimmed as if they were whitespace.
test("header values preserve bytes >= 0x80", async () => {
let receivedValue: string | null = null;
await using server = Bun.serve({
port: 0,
fetch(req) {
receivedValue = req.headers.get("x-test");
return new Response("OK");
},
});
const client = net.connect(server.port, "127.0.0.1");
// Send a raw HTTP request with 0xFF bytes surrounding the header value
const request = Buffer.concat([
Buffer.from("GET / HTTP/1.1\r\nHost: localhost\r\nX-Test: "),
Buffer.from([0xff]),
Buffer.from("value"),
Buffer.from([0xff]),
Buffer.from("\r\n\r\n"),
]);
await new Promise<void>((resolve, reject) => {
client.on("error", reject);
client.on("data", data => {
const response = data.toString();
expect(response).toContain("HTTP/1.1 200");
// The header value should preserve the 0xFF bytes — not strip them.
// 0xFF as a Latin-1 byte becomes U+00FF (ÿ) in the JS string.
expect(receivedValue).not.toBeNull();
expect(receivedValue!.length).toBe(7);
expect(receivedValue!.charCodeAt(0)).toBe(0xff);
expect(receivedValue!.charCodeAt(6)).toBe(0xff);
client.end();
resolve();
});
client.write(request);
});
});
test("header values still trim actual whitespace (SP, HTAB)", async () => {
let receivedValue: string | null = null;
await using server = Bun.serve({
port: 0,
fetch(req) {
receivedValue = req.headers.get("x-test");
return new Response("OK");
},
});
const client = net.connect(server.port, "127.0.0.1");
// Send a raw HTTP request with spaces and tabs surrounding the header value
const request = Buffer.from("GET / HTTP/1.1\r\nHost: localhost\r\nX-Test: \t value \t \r\n\r\n");
await new Promise<void>((resolve, reject) => {
client.on("error", reject);
client.on("data", data => {
const response = data.toString();
expect(response).toContain("HTTP/1.1 200");
expect(receivedValue).toBe("value");
client.end();
resolve();
});
client.write(request);
});
});