Skip to content

Commit

Permalink
Make UTextFromTextBuffer newline aware (#17120)
Browse files Browse the repository at this point in the history
This PR achieves two things:
* When encountering rows with newlines (`WasForceWrapped` = `false`)
  we'll now copy the contents out of the row and append a `\n`.
  To make `utext_clone` cheap, it adds a reference counted buffer.
* Text extraction in `Terminal::GetHyperlinkAtBufferPosition`
  was fixed by using a higher level `TextBuffer::GetPlainText`
  instead of iterating through each cell.

Closes #16676
Closes #17065

## Validation Steps Performed
* In pwsh execute the following:
  ``"`e[999C`e[22Dhttps://example.com/foo`nbar"``
* Hovering over the URL only underlines `.../foo` and not `bar` ✅
* The tooltip ends in `.../foo` and not `.../fo` ✅
  • Loading branch information
lhecker committed Apr 29, 2024
1 parent be5a240 commit 5b8eadb
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 39 deletions.
135 changes: 124 additions & 11 deletions src/buffer/out/UTextAdapter.cpp
Expand Up @@ -6,19 +6,87 @@

#include "textBuffer.hpp"

// All of these are somewhat annoying when trying to implement RefcountBuffer.
// You can't stuff a unique_ptr into ut->q (= void*) after all.
#pragma warning(disable : 26402) // Return a scoped object instead of a heap-allocated if it has a move constructor (r.3).
#pragma warning(disable : 26403) // Reset or explicitly delete an owner<T> pointer '...' (r.3).
#pragma warning(disable : 26409) // Avoid calling new and delete explicitly, use std::make_unique<T> instead (r.11).

struct RowRange
{
til::CoordType begin;
til::CoordType end;
};

struct RefcountBuffer
{
size_t references;
size_t capacity;
wchar_t data[1];

static RefcountBuffer* EnsureCapacityForOverwrite(RefcountBuffer* buffer, size_t capacity)
{
// We must not just ensure that `buffer` has at least `capacity`, but also that its reference count is <= 1, because otherwise we would resize a shared buffer.
if (buffer != nullptr && buffer->references <= 1 && buffer->capacity >= capacity)
{
return buffer;
}

const auto oldCapacity = buffer ? buffer->capacity << 1 : 0;
const auto newCapacity = std::max(capacity + 128, oldCapacity);
const auto newBuffer = static_cast<RefcountBuffer*>(::operator new(sizeof(RefcountBuffer) - sizeof(data) + newCapacity * sizeof(wchar_t)));

if (!newBuffer)
{
return nullptr;
}

if (buffer)
{
buffer->Release();
}

// Copying the old buffer's data is not necessary because utextAccess() will scribble right over it.
newBuffer->references = 1;
newBuffer->capacity = newCapacity;
return newBuffer;
}

void AddRef() noexcept
{
// With our usage patterns, either of these two would indicate
// an unbalanced AddRef/Release or a memory corruption.
assert(references > 0 && references < 1000);
references++;
}

void Release() noexcept
{
// With our usage patterns, either of these two would indicate
// an unbalanced AddRef/Release or a memory corruption.
assert(references > 0 && references < 1000);
if (--references == 0)
{
::operator delete(this);
}
}
};

constexpr size_t& accessLength(UText* ut) noexcept
{
static_assert(sizeof(ut->p) == sizeof(size_t));
return *std::bit_cast<size_t*>(&ut->p);
}

constexpr RefcountBuffer*& accessBuffer(UText* ut) noexcept
{
static_assert(sizeof(ut->q) == sizeof(RefcountBuffer*));
return *std::bit_cast<RefcountBuffer**>(&ut->q);
}

constexpr RowRange& accessRowRange(UText* ut) noexcept
{
static_assert(sizeof(ut->a) == sizeof(RowRange));
return *std::bit_cast<RowRange*>(&ut->a);
}

Expand Down Expand Up @@ -56,11 +124,16 @@ static UText* U_CALLCONV utextClone(UText* dest, const UText* src, UBool deep, U
}

dest = utext_setup(dest, 0, status);
if (*status <= U_ZERO_ERROR)
if (*status > U_ZERO_ERROR)
{
memcpy(dest, src, sizeof(UText));
return dest;
}

memcpy(dest, src, sizeof(UText));
if (const auto buf = accessBuffer(dest))
{
buf->AddRef();
}
return dest;
}

Expand All @@ -82,7 +155,9 @@ try

for (til::CoordType y = range.begin; y < range.end; ++y)
{
length += textBuffer.GetRowByOffset(y).GetText().size();
const auto& row = textBuffer.GetRowByOffset(y);
// Later down below we'll add a newline to the text if !wasWrapForced, so we need to account for that here.
length += row.GetText().size() + !row.WasWrapForced();
}

accessLength(ut) = length;
Expand Down Expand Up @@ -126,24 +201,31 @@ try
const auto range = accessRowRange(ut);
auto start = ut->chunkNativeStart;
auto limit = ut->chunkNativeLimit;
auto y = accessCurrentRow(ut);
std::wstring_view text;

if (neededIndex < start || neededIndex >= limit)
{
auto y = accessCurrentRow(ut);
std::wstring_view text;
bool wasWrapForced = false;

if (neededIndex < start)
{
do
{
--y;
if (y < range.begin)
{
assert(false);
return false;
}

text = textBuffer.GetRowByOffset(y).GetText();
const auto& row = textBuffer.GetRowByOffset(y);
text = row.GetText();
wasWrapForced = row.WasWrapForced();

limit = start;
start -= text.size();
// Later down below we'll add a newline to the text if !wasWrapForced, so we need to account for that here.
start -= text.size() + !wasWrapForced;
} while (neededIndex < start);
}
else
Expand All @@ -153,15 +235,32 @@ try
++y;
if (y >= range.end)
{
assert(false);
return false;
}

text = textBuffer.GetRowByOffset(y).GetText();
const auto& row = textBuffer.GetRowByOffset(y);
text = row.GetText();
wasWrapForced = row.WasWrapForced();

start = limit;
limit += text.size();
// Later down below we'll add a newline to the text if !wasWrapForced, so we need to account for that here.
limit += text.size() + !wasWrapForced;
} while (neededIndex >= limit);
}

if (!wasWrapForced)
{
const auto newSize = text.size() + 1;
const auto buffer = RefcountBuffer::EnsureCapacityForOverwrite(accessBuffer(ut), newSize);

memcpy(&buffer->data[0], text.data(), text.size() * sizeof(wchar_t));
til::at(buffer->data, text.size()) = L'\n';

text = { &buffer->data[0], newSize };
accessBuffer(ut) = buffer;
}

accessCurrentRow(ut) = y;
ut->chunkNativeStart = start;
ut->chunkNativeLimit = limit;
Expand Down Expand Up @@ -256,18 +355,32 @@ catch (...)
return 0;
}

static void U_CALLCONV utextClose(UText* ut) noexcept
{
if (const auto buffer = accessBuffer(ut))
{
buffer->Release();
}
}

static constexpr UTextFuncs utextFuncs{
.tableSize = sizeof(UTextFuncs),
.clone = utextClone,
.nativeLength = utextNativeLength,
.access = utextAccess,
.close = utextClose,
};

// Creates a UText from the given TextBuffer that spans rows [rowBeg,RowEnd).
UText Microsoft::Console::ICU::UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept
Microsoft::Console::ICU::unique_utext Microsoft::Console::ICU::UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept
{
#pragma warning(suppress : 26477) // Use 'nullptr' rather than 0 or NULL (es.47).
UText ut = UTEXT_INITIALIZER;
unique_utext ut{ UTEXT_INITIALIZER };

UErrorCode status = U_ZERO_ERROR;
utext_setup(&ut, 0, &status);
FAIL_FAST_IF(status > U_ZERO_ERROR);

ut.providerProperties = (1 << UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE) | (1 << UTEXT_PROVIDER_STABLE_CHUNKS);
ut.pFuncs = &utextFuncs;
ut.context = &textBuffer;
Expand Down
3 changes: 2 additions & 1 deletion src/buffer/out/UTextAdapter.h
Expand Up @@ -10,8 +10,9 @@ class TextBuffer;
namespace Microsoft::Console::ICU
{
using unique_uregex = wistd::unique_ptr<URegularExpression, wil::function_deleter<decltype(&uregex_close), &uregex_close>>;
using unique_utext = wil::unique_struct<UText, decltype(&utext_close), &utext_close>;

UText UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept;
unique_utext UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept;
unique_uregex CreateRegex(const std::wstring_view& pattern, uint32_t flags, UErrorCode* status) noexcept;
til::point_span BufferRangeFromMatch(UText* ut, URegularExpression* re);
}
21 changes: 3 additions & 18 deletions src/buffer/out/textBuffer.cpp
Expand Up @@ -1995,25 +1995,10 @@ size_t TextBuffer::SpanLength(const til::point coordStart, const til::point coor
// - end - where to end getting text
// Return Value:
// - Just the text.
std::wstring TextBuffer::GetPlainText(const til::point& start, const til::point& end) const
std::wstring TextBuffer::GetPlainText(const til::point start, const til::point end) const
{
std::wstring text;
auto spanLength = SpanLength(start, end);
text.reserve(spanLength);

auto it = GetCellDataAt(start);

for (; it && spanLength > 0; ++it, --spanLength)
{
const auto& cell = *it;
if (cell.DbcsAttr() != DbcsAttribute::Trailing)
{
const auto chars = cell.Chars();
text.append(chars);
}
}

return text;
const auto req = CopyRequest::FromConfig(*this, start, end, true, false, false, false);
return GetPlainText(req);
}

// Routine Description:
Expand Down
2 changes: 1 addition & 1 deletion src/buffer/out/textBuffer.hpp
Expand Up @@ -196,7 +196,7 @@ class TextBuffer final

size_t SpanLength(const til::point coordStart, const til::point coordEnd) const;

std::wstring GetPlainText(const til::point& start, const til::point& end) const;
std::wstring GetPlainText(til::point start, til::point end) const;

struct CopyRequest
{
Expand Down
9 changes: 1 addition & 8 deletions src/cascadia/TerminalCore/Terminal.cpp
Expand Up @@ -522,14 +522,7 @@ std::wstring Terminal::GetHyperlinkAtBufferPosition(const til::point bufferPos)
// Case 2 - Step 2: get the auto-detected hyperlink
if (result.has_value() && result->value == _hyperlinkPatternId)
{
std::wstring uri;
const auto startIter = _activeBuffer().GetCellDataAt(result->start);
const auto endIter = _activeBuffer().GetCellDataAt(result->stop);
for (auto iter = startIter; iter != endIter; ++iter)
{
uri += iter->Chars();
}
return uri;
return _activeBuffer().GetPlainText(result->start, result->stop);
}
return {};
}
Expand Down

0 comments on commit 5b8eadb

Please sign in to comment.