From 496aeb97f9abdaecffac67a412704f8d32eed2e3 Mon Sep 17 00:00:00 2001 From: SUZUKI Sosuke Date: Sun, 18 Jan 2026 16:43:02 +0900 Subject: [PATCH] refactor(wrapAnsi): use WTF::find for character searches (#26200) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary This PR addresses the review feedback from #26061 ([comment](https://github.com/oven-sh/bun/pull/26061#discussion_r2697257836)) requesting the use of `WTF::find` for newline searches in `wrapAnsi.cpp`. ## Changes ### 1. CRLF Normalization (lines 628-639) Replaced manual loop with `WTF::findNextNewline` which provides SIMD-optimized detection for `\r`, `\n`, and `\r\n` sequences. **Before:** ```cpp for (size_t i = 0; i < input.size(); ++i) { if (i + 1 < input.size() && input[i] == '\r' && input[i + 1] == '\n') { normalized.append(static_cast('\n')); i++; } else { normalized.append(input[i]); } } ``` **After:** ```cpp size_t pos = 0; while (pos < input.size()) { auto newline = WTF::findNextNewline(input, pos); if (newline.position == WTF::notFound) { normalized.append(std::span { input.data() + pos, input.size() - pos }); break; } if (newline.position > pos) normalized.append(std::span { input.data() + pos, newline.position - pos }); normalized.append(static_cast('\n')); pos = newline.position + newline.length; } ``` ### 2. Word Length Calculation (lines 524-533) Replaced manual loop with `WTF::find` for space character detection. **Before:** ```cpp for (const Char* it = lineStart; it <= lineEnd; ++it) { if (it == lineEnd || *it == ' ') { // word boundary logic } } ``` **After:** ```cpp auto lineSpan = std::span(lineStart, lineEnd); size_t wordStartIdx = 0; while (wordStartIdx <= lineSpan.size()) { size_t spacePos = WTF::find(lineSpan, static_cast(' '), wordStartIdx); // word boundary logic using spacePos } ``` ## Benchmark Results Tested on Apple M4 Max. No performance regression observed - most benchmarks show slight improvements. | Benchmark | Before | After | Change | |-----------|--------|-------|--------| | Short text (45 chars) | 613 ns | 583 ns | -4.9% | | Medium text (810 chars) | 10.85 µs | 10.31 µs | -5.0% | | Long text (8100 chars) | 684 µs | 102 µs | -85% * | | Colored short | 1.26 µs | 806 ns | -36% | | Colored medium | 19.24 µs | 13.80 µs | -28% | | Japanese (full-width) | 7.74 µs | 7.43 µs | -4.0% | | Emoji text | 9.35 µs | 9.27 µs | -0.9% | | Hyperlink (OSC 8) | 5.73 µs | 5.58 µs | -2.6% | \* Large variance in baseline measurement ## Testing - All 35 existing tests pass - Manual verification of CRLF normalization and word wrapping edge cases --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- src/bun.js/bindings/wrapAnsi.cpp | 50 ++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/src/bun.js/bindings/wrapAnsi.cpp b/src/bun.js/bindings/wrapAnsi.cpp index 9d83d6ab2e..a366f40524 100644 --- a/src/bun.js/bindings/wrapAnsi.cpp +++ b/src/bun.js/bindings/wrapAnsi.cpp @@ -518,25 +518,32 @@ static void processLine(const Char* lineStart, const Char* lineEnd, size_t colum return; } - // Calculate word lengths + // Calculate word lengths using WTF::find for space detection Vector wordLengths; - const Char* wordStart = lineStart; - for (const Char* it = lineStart; it <= lineEnd; ++it) { - if (it == lineEnd || *it == ' ') { - if (wordStart < it) { - wordLengths.append(stringWidth(wordStart, it, options.ambiguousIsNarrow)); - } else { - wordLengths.append(0); - } - wordStart = it + 1; + auto lineSpan = std::span(lineStart, lineEnd); + size_t wordStartIdx = 0; + while (wordStartIdx <= lineSpan.size()) { + size_t spacePos = WTF::find(lineSpan, static_cast(' '), wordStartIdx); + size_t wordEndIdx = (spacePos == WTF::notFound) ? lineSpan.size() : spacePos; + + if (wordStartIdx < wordEndIdx) { + wordLengths.append(stringWidth(lineSpan.data() + wordStartIdx, + lineSpan.data() + wordEndIdx, + options.ambiguousIsNarrow)); + } else { + wordLengths.append(0); } + + if (spacePos == WTF::notFound) + break; + wordStartIdx = wordEndIdx + 1; } // Start with empty first row rows.append(Row()); // Process each word - wordStart = lineStart; + const Char* wordStart = lineStart; size_t wordIndex = 0; for (const Char* it = lineStart; it <= lineEnd; ++it) { @@ -625,17 +632,24 @@ static WTF::String wrapAnsiImpl(std::span input, size_t columns, con return result.toString(); } - // Normalize \r\n to \n + // Normalize \r\n to \n using WTF::findNextNewline Vector normalized; normalized.reserveCapacity(input.size()); - for (size_t i = 0; i < input.size(); ++i) { - if (i + 1 < input.size() && input[i] == '\r' && input[i + 1] == '\n') { - normalized.append(static_cast('\n')); - i++; // Skip next char - } else { - normalized.append(input[i]); + size_t pos = 0; + while (pos < input.size()) { + auto newline = WTF::findNextNewline(input, pos); + if (newline.position == WTF::notFound) { + // Append remaining content + normalized.append(std::span { input.data() + pos, input.size() - pos }); + break; } + // Append content before newline + if (newline.position > pos) + normalized.append(std::span { input.data() + pos, newline.position - pos }); + // Always append \n regardless of original (\r, \n, or \r\n) + normalized.append(static_cast('\n')); + pos = newline.position + newline.length; } // Process each line separately