Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make UTextFromTextBuffer newline aware #17120

Merged
merged 3 commits into from Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
118 changes: 109 additions & 9 deletions src/buffer/out/UTextAdapter.cpp
Expand Up @@ -12,13 +12,75 @@ struct RowRange
til::CoordType end;
};

struct RefcountBuffer
DHowett marked this conversation as resolved.
Show resolved Hide resolved
{
size_t references;
size_t capacity;
wchar_t data[1];

static RefcountBuffer* EnsureCapacityForOverwrite(RefcountBuffer* buffer, size_t capacity)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: This might warrant a doc comment. This is the first thing in the review, and it's not immediately clear what it's trying to achieve

check if buffer exists, and is at least size capacity. If it isn't, make a new one at least the size of capacity.

{
// We must not just ensure that `buffer` has at least `capacity`, but also that its reference count is <= 1, because otherwise we would resize a shared buffer.
if (buffer != nullptr && buffer->references <= 1 && buffer->capacity >= capacity)
{
return buffer;
}

const auto oldCapacity = buffer ? buffer->capacity << 1 : 0;
const auto newCapacity = std::max(capacity + 128, oldCapacity);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we had to make a new buffer, make the new buffer's size the bigger of (2x the old buffer's size OR capacity+128). I guess to add some padding, in case the caller only needed 80 right now, but might want 200 with this same buffer

const auto newBuffer = static_cast<RefcountBuffer*>(malloc(sizeof(RefcountBuffer) - sizeof(data) + newCapacity * sizeof(wchar_t)));

if (newBuffer == nullptr)
{
return nullptr;
}

if (buffer)
{
buffer->Release();
}

// Copying the old buffer's data is not necessary because utextAccess() will scribble right over it.
newBuffer->references = 1;
newBuffer->capacity = newCapacity;
return newBuffer;
}

void AddRef()
{
// With our usage patterns, either of these two would indicate
// an unbalanced AddRef/Release or a memory corruption.
assert(references > 0 && references < 1000);
references++;
}

void Release()
{
// With our usage patterns, either of these two would indicate
// an unbalanced AddRef/Release or a memory corruption.
assert(references > 0 && references < 1000);
if (--references == 0)
{
free(this);
}
}
};

constexpr size_t& accessLength(UText* ut) noexcept
{
static_assert(sizeof(ut->p) == sizeof(size_t));
return *std::bit_cast<size_t*>(&ut->p);
}

constexpr RefcountBuffer*& accessBuffer(UText* ut) noexcept
{
static_assert(sizeof(ut->q) == sizeof(RefcountBuffer*));
return *std::bit_cast<RefcountBuffer**>(&ut->q);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh so this is some spooky bit casting magic, got it

}

constexpr RowRange& accessRowRange(UText* ut) noexcept
{
static_assert(sizeof(ut->a) == sizeof(RowRange));
return *std::bit_cast<RowRange*>(&ut->a);
}

Expand Down Expand Up @@ -56,11 +118,16 @@ static UText* U_CALLCONV utextClone(UText* dest, const UText* src, UBool deep, U
}

dest = utext_setup(dest, 0, status);
if (*status <= U_ZERO_ERROR)
if (*status > U_ZERO_ERROR)
{
memcpy(dest, src, sizeof(UText));
return dest;
}

memcpy(dest, src, sizeof(UText));
if (const auto buf = accessBuffer(dest))
{
buf->AddRef();
}
return dest;
}

Expand Down Expand Up @@ -126,11 +193,13 @@ try
const auto range = accessRowRange(ut);
auto start = ut->chunkNativeStart;
auto limit = ut->chunkNativeLimit;
auto y = accessCurrentRow(ut);
std::wstring_view text;

if (neededIndex < start || neededIndex >= limit)
{
auto y = accessCurrentRow(ut);
std::wstring_view text;
bool wasWrapForced = false;

if (neededIndex < start)
{
do
Expand All @@ -141,7 +210,10 @@ try
return false;
}

text = textBuffer.GetRowByOffset(y).GetText();
const auto& row = textBuffer.GetRowByOffset(y);
text = row.GetText();
wasWrapForced = row.WasWrapForced();

limit = start;
start -= text.size();
} while (neededIndex < start);
Expand All @@ -156,12 +228,27 @@ try
return false;
}

text = textBuffer.GetRowByOffset(y).GetText();
const auto& row = textBuffer.GetRowByOffset(y);
text = row.GetText();
wasWrapForced = row.WasWrapForced();

start = limit;
limit += text.size();
} while (neededIndex >= limit);
}

if (!wasWrapForced)
{
const auto requiredSpace = text.size() + 1;
const auto buffer = RefcountBuffer::EnsureCapacityForOverwrite(accessBuffer(ut), requiredSpace);

memcpy(&buffer->data[0], text.data(), text.size() * sizeof(wchar_t));
buffer->data[text.size()] = L'\n';

text = { &buffer->data[0], requiredSpace };
accessBuffer(ut) = buffer;
}

accessCurrentRow(ut) = y;
ut->chunkNativeStart = start;
ut->chunkNativeLimit = limit;
Expand Down Expand Up @@ -256,18 +343,31 @@ catch (...)
return 0;
}

static void U_CALLCONV utextClose(UText* ut) noexcept
{
if (const auto buffer = accessBuffer(ut))
{
buffer->Release();
}
}

static constexpr UTextFuncs utextFuncs{
.tableSize = sizeof(UTextFuncs),
.clone = utextClone,
.nativeLength = utextNativeLength,
.access = utextAccess,
.close = utextClose,
};

// Creates a UText from the given TextBuffer that spans rows [rowBeg,RowEnd).
UText Microsoft::Console::ICU::UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept
Microsoft::Console::ICU::unique_utext Microsoft::Console::ICU::UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept
{
#pragma warning(suppress : 26477) // Use 'nullptr' rather than 0 or NULL (es.47).
UText ut = UTEXT_INITIALIZER;
unique_utext ut{ UTEXT_INITIALIZER };

UErrorCode status = U_ZERO_ERROR;
utext_setup(&ut, 0, &status);
THROW_HR_IF(E_UNEXPECTED, status > U_ZERO_ERROR);

ut.providerProperties = (1 << UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE) | (1 << UTEXT_PROVIDER_STABLE_CHUNKS);
ut.pFuncs = &utextFuncs;
ut.context = &textBuffer;
Expand Down
3 changes: 2 additions & 1 deletion src/buffer/out/UTextAdapter.h
Expand Up @@ -10,8 +10,9 @@ class TextBuffer;
namespace Microsoft::Console::ICU
{
using unique_uregex = wistd::unique_ptr<URegularExpression, wil::function_deleter<decltype(&uregex_close), &uregex_close>>;
using unique_utext = wil::unique_struct<UText, decltype(&utext_close), &utext_close>;

UText UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept;
unique_utext UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept;
unique_uregex CreateRegex(const std::wstring_view& pattern, uint32_t flags, UErrorCode* status) noexcept;
til::point_span BufferRangeFromMatch(UText* ut, URegularExpression* re);
}
21 changes: 3 additions & 18 deletions src/buffer/out/textBuffer.cpp
Expand Up @@ -1995,25 +1995,10 @@ size_t TextBuffer::SpanLength(const til::point coordStart, const til::point coor
// - end - where to end getting text
// Return Value:
// - Just the text.
std::wstring TextBuffer::GetPlainText(const til::point& start, const til::point& end) const
std::wstring TextBuffer::GetPlainText(const til::point start, const til::point end) const
{
std::wstring text;
auto spanLength = SpanLength(start, end);
text.reserve(spanLength);

auto it = GetCellDataAt(start);

for (; it && spanLength > 0; ++it, --spanLength)
{
const auto& cell = *it;
if (cell.DbcsAttr() != DbcsAttribute::Trailing)
{
const auto chars = cell.Chars();
text.append(chars);
}
}

return text;
const auto req = CopyRequest::FromConfig(*this, start, end, true, false, false, false);
Copy link
Member Author

@lhecker lhecker Apr 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW I believe we should remove the constructor from CopyRequest and instead sanitize it in each individual function that accepts it. This would allow the callers to use designated initializers like so:

CopyRequest req{
    .beg = ...,
    .end = ...,
    .singleLine = true,
};

This would turn CopyRequest into a true request struct IMO. It would be similar to sanitizing a web request on the server to protect against malicious clients.

return GetPlainText(req);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we sure this is functionally equivalent? can you test the known multi-cell-character URL bugs?

Copy link
Member Author

@lhecker lhecker Apr 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GetPlainText is only used when pressing Ctrl+Enter on a URL (in mark mode) if it's detected via pattern, and the pattern only supports ASCII characters.

}

// Routine Description:
Expand Down
2 changes: 1 addition & 1 deletion src/buffer/out/textBuffer.hpp
Expand Up @@ -196,7 +196,7 @@ class TextBuffer final

size_t SpanLength(const til::point coordStart, const til::point coordEnd) const;

std::wstring GetPlainText(const til::point& start, const til::point& end) const;
std::wstring GetPlainText(til::point start, til::point end) const;

struct CopyRequest
{
Expand Down
9 changes: 1 addition & 8 deletions src/cascadia/TerminalCore/Terminal.cpp
Expand Up @@ -522,14 +522,7 @@ std::wstring Terminal::GetHyperlinkAtBufferPosition(const til::point bufferPos)
// Case 2 - Step 2: get the auto-detected hyperlink
if (result.has_value() && result->value == _hyperlinkPatternId)
{
std::wstring uri;
const auto startIter = _activeBuffer().GetCellDataAt(result->start);
const auto endIter = _activeBuffer().GetCellDataAt(result->stop);
for (auto iter = startIter; iter != endIter; ++iter)
{
uri += iter->Chars();
}
return uri;
return _activeBuffer().GetPlainText(result->start, result->stop);
}
return {};
}
Expand Down