From 5b8eadb2ea16206bac2bb1c0b06f35bd4d0aae15 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 29 Apr 2024 18:43:47 +0200 Subject: [PATCH] Make UTextFromTextBuffer newline aware (#17120) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR achieves two things: * When encountering rows with newlines (`WasForceWrapped` = `false`) we'll now copy the contents out of the row and append a `\n`. To make `utext_clone` cheap, it adds a reference counted buffer. * Text extraction in `Terminal::GetHyperlinkAtBufferPosition` was fixed by using a higher level `TextBuffer::GetPlainText` instead of iterating through each cell. Closes #16676 Closes #17065 ## Validation Steps Performed * In pwsh execute the following: ``"`e[999C`e[22Dhttps://example.com/foo`nbar"`` * Hovering over the URL only underlines `.../foo` and not `bar` ✅ * The tooltip ends in `.../foo` and not `.../fo` ✅ --- src/buffer/out/UTextAdapter.cpp | 135 +++++++++++++++++++++++-- src/buffer/out/UTextAdapter.h | 3 +- src/buffer/out/textBuffer.cpp | 21 +--- src/buffer/out/textBuffer.hpp | 2 +- src/cascadia/TerminalCore/Terminal.cpp | 9 +- 5 files changed, 131 insertions(+), 39 deletions(-) diff --git a/src/buffer/out/UTextAdapter.cpp b/src/buffer/out/UTextAdapter.cpp index 7dec8d2dc04..84ba9583f0c 100644 --- a/src/buffer/out/UTextAdapter.cpp +++ b/src/buffer/out/UTextAdapter.cpp @@ -6,19 +6,87 @@ #include "textBuffer.hpp" +// All of these are somewhat annoying when trying to implement RefcountBuffer. +// You can't stuff a unique_ptr into ut->q (= void*) after all. +#pragma warning(disable : 26402) // Return a scoped object instead of a heap-allocated if it has a move constructor (r.3). +#pragma warning(disable : 26403) // Reset or explicitly delete an owner pointer '...' (r.3). +#pragma warning(disable : 26409) // Avoid calling new and delete explicitly, use std::make_unique instead (r.11). + struct RowRange { til::CoordType begin; til::CoordType end; }; +struct RefcountBuffer +{ + size_t references; + size_t capacity; + wchar_t data[1]; + + static RefcountBuffer* EnsureCapacityForOverwrite(RefcountBuffer* buffer, size_t capacity) + { + // We must not just ensure that `buffer` has at least `capacity`, but also that its reference count is <= 1, because otherwise we would resize a shared buffer. + if (buffer != nullptr && buffer->references <= 1 && buffer->capacity >= capacity) + { + return buffer; + } + + const auto oldCapacity = buffer ? buffer->capacity << 1 : 0; + const auto newCapacity = std::max(capacity + 128, oldCapacity); + const auto newBuffer = static_cast(::operator new(sizeof(RefcountBuffer) - sizeof(data) + newCapacity * sizeof(wchar_t))); + + if (!newBuffer) + { + return nullptr; + } + + if (buffer) + { + buffer->Release(); + } + + // Copying the old buffer's data is not necessary because utextAccess() will scribble right over it. + newBuffer->references = 1; + newBuffer->capacity = newCapacity; + return newBuffer; + } + + void AddRef() noexcept + { + // With our usage patterns, either of these two would indicate + // an unbalanced AddRef/Release or a memory corruption. + assert(references > 0 && references < 1000); + references++; + } + + void Release() noexcept + { + // With our usage patterns, either of these two would indicate + // an unbalanced AddRef/Release or a memory corruption. + assert(references > 0 && references < 1000); + if (--references == 0) + { + ::operator delete(this); + } + } +}; + constexpr size_t& accessLength(UText* ut) noexcept { + static_assert(sizeof(ut->p) == sizeof(size_t)); return *std::bit_cast(&ut->p); } +constexpr RefcountBuffer*& accessBuffer(UText* ut) noexcept +{ + static_assert(sizeof(ut->q) == sizeof(RefcountBuffer*)); + return *std::bit_cast(&ut->q); +} + constexpr RowRange& accessRowRange(UText* ut) noexcept { + static_assert(sizeof(ut->a) == sizeof(RowRange)); return *std::bit_cast(&ut->a); } @@ -56,11 +124,16 @@ static UText* U_CALLCONV utextClone(UText* dest, const UText* src, UBool deep, U } dest = utext_setup(dest, 0, status); - if (*status <= U_ZERO_ERROR) + if (*status > U_ZERO_ERROR) { - memcpy(dest, src, sizeof(UText)); + return dest; } + memcpy(dest, src, sizeof(UText)); + if (const auto buf = accessBuffer(dest)) + { + buf->AddRef(); + } return dest; } @@ -82,7 +155,9 @@ try for (til::CoordType y = range.begin; y < range.end; ++y) { - length += textBuffer.GetRowByOffset(y).GetText().size(); + const auto& row = textBuffer.GetRowByOffset(y); + // Later down below we'll add a newline to the text if !wasWrapForced, so we need to account for that here. + length += row.GetText().size() + !row.WasWrapForced(); } accessLength(ut) = length; @@ -126,11 +201,13 @@ try const auto range = accessRowRange(ut); auto start = ut->chunkNativeStart; auto limit = ut->chunkNativeLimit; - auto y = accessCurrentRow(ut); - std::wstring_view text; if (neededIndex < start || neededIndex >= limit) { + auto y = accessCurrentRow(ut); + std::wstring_view text; + bool wasWrapForced = false; + if (neededIndex < start) { do @@ -138,12 +215,17 @@ try --y; if (y < range.begin) { + assert(false); return false; } - text = textBuffer.GetRowByOffset(y).GetText(); + const auto& row = textBuffer.GetRowByOffset(y); + text = row.GetText(); + wasWrapForced = row.WasWrapForced(); + limit = start; - start -= text.size(); + // Later down below we'll add a newline to the text if !wasWrapForced, so we need to account for that here. + start -= text.size() + !wasWrapForced; } while (neededIndex < start); } else @@ -153,15 +235,32 @@ try ++y; if (y >= range.end) { + assert(false); return false; } - text = textBuffer.GetRowByOffset(y).GetText(); + const auto& row = textBuffer.GetRowByOffset(y); + text = row.GetText(); + wasWrapForced = row.WasWrapForced(); + start = limit; - limit += text.size(); + // Later down below we'll add a newline to the text if !wasWrapForced, so we need to account for that here. + limit += text.size() + !wasWrapForced; } while (neededIndex >= limit); } + if (!wasWrapForced) + { + const auto newSize = text.size() + 1; + const auto buffer = RefcountBuffer::EnsureCapacityForOverwrite(accessBuffer(ut), newSize); + + memcpy(&buffer->data[0], text.data(), text.size() * sizeof(wchar_t)); + til::at(buffer->data, text.size()) = L'\n'; + + text = { &buffer->data[0], newSize }; + accessBuffer(ut) = buffer; + } + accessCurrentRow(ut) = y; ut->chunkNativeStart = start; ut->chunkNativeLimit = limit; @@ -256,18 +355,32 @@ catch (...) return 0; } +static void U_CALLCONV utextClose(UText* ut) noexcept +{ + if (const auto buffer = accessBuffer(ut)) + { + buffer->Release(); + } +} + static constexpr UTextFuncs utextFuncs{ .tableSize = sizeof(UTextFuncs), .clone = utextClone, .nativeLength = utextNativeLength, .access = utextAccess, + .close = utextClose, }; // Creates a UText from the given TextBuffer that spans rows [rowBeg,RowEnd). -UText Microsoft::Console::ICU::UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept +Microsoft::Console::ICU::unique_utext Microsoft::Console::ICU::UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept { #pragma warning(suppress : 26477) // Use 'nullptr' rather than 0 or NULL (es.47). - UText ut = UTEXT_INITIALIZER; + unique_utext ut{ UTEXT_INITIALIZER }; + + UErrorCode status = U_ZERO_ERROR; + utext_setup(&ut, 0, &status); + FAIL_FAST_IF(status > U_ZERO_ERROR); + ut.providerProperties = (1 << UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE) | (1 << UTEXT_PROVIDER_STABLE_CHUNKS); ut.pFuncs = &utextFuncs; ut.context = &textBuffer; diff --git a/src/buffer/out/UTextAdapter.h b/src/buffer/out/UTextAdapter.h index c8c325143ef..39903627b55 100644 --- a/src/buffer/out/UTextAdapter.h +++ b/src/buffer/out/UTextAdapter.h @@ -10,8 +10,9 @@ class TextBuffer; namespace Microsoft::Console::ICU { using unique_uregex = wistd::unique_ptr>; + using unique_utext = wil::unique_struct; - UText UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept; + unique_utext UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept; unique_uregex CreateRegex(const std::wstring_view& pattern, uint32_t flags, UErrorCode* status) noexcept; til::point_span BufferRangeFromMatch(UText* ut, URegularExpression* re); } diff --git a/src/buffer/out/textBuffer.cpp b/src/buffer/out/textBuffer.cpp index 1b5b28c621b..10c70646e9d 100644 --- a/src/buffer/out/textBuffer.cpp +++ b/src/buffer/out/textBuffer.cpp @@ -1995,25 +1995,10 @@ size_t TextBuffer::SpanLength(const til::point coordStart, const til::point coor // - end - where to end getting text // Return Value: // - Just the text. -std::wstring TextBuffer::GetPlainText(const til::point& start, const til::point& end) const +std::wstring TextBuffer::GetPlainText(const til::point start, const til::point end) const { - std::wstring text; - auto spanLength = SpanLength(start, end); - text.reserve(spanLength); - - auto it = GetCellDataAt(start); - - for (; it && spanLength > 0; ++it, --spanLength) - { - const auto& cell = *it; - if (cell.DbcsAttr() != DbcsAttribute::Trailing) - { - const auto chars = cell.Chars(); - text.append(chars); - } - } - - return text; + const auto req = CopyRequest::FromConfig(*this, start, end, true, false, false, false); + return GetPlainText(req); } // Routine Description: diff --git a/src/buffer/out/textBuffer.hpp b/src/buffer/out/textBuffer.hpp index f250749c577..5a8a5c16547 100644 --- a/src/buffer/out/textBuffer.hpp +++ b/src/buffer/out/textBuffer.hpp @@ -196,7 +196,7 @@ class TextBuffer final size_t SpanLength(const til::point coordStart, const til::point coordEnd) const; - std::wstring GetPlainText(const til::point& start, const til::point& end) const; + std::wstring GetPlainText(til::point start, til::point end) const; struct CopyRequest { diff --git a/src/cascadia/TerminalCore/Terminal.cpp b/src/cascadia/TerminalCore/Terminal.cpp index 854d54394d0..80f89a06320 100644 --- a/src/cascadia/TerminalCore/Terminal.cpp +++ b/src/cascadia/TerminalCore/Terminal.cpp @@ -522,14 +522,7 @@ std::wstring Terminal::GetHyperlinkAtBufferPosition(const til::point bufferPos) // Case 2 - Step 2: get the auto-detected hyperlink if (result.has_value() && result->value == _hyperlinkPatternId) { - std::wstring uri; - const auto startIter = _activeBuffer().GetCellDataAt(result->start); - const auto endIter = _activeBuffer().GetCellDataAt(result->stop); - for (auto iter = startIter; iter != endIter; ++iter) - { - uri += iter->Chars(); - } - return uri; + return _activeBuffer().GetPlainText(result->start, result->stop); } return {}; }