Skip to content

Commit

Permalink
Fixed unicode (#34)
Browse files Browse the repository at this point in the history
  • Loading branch information
Macoron committed Jun 17, 2023
1 parent 5a53be7 commit 7484284
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 2 deletions.
32 changes: 32 additions & 0 deletions Packages/com.whisper.unity/Runtime/Utils/TextUtils.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
using System;
using System.Runtime.InteropServices;
using System.Text;

namespace Whisper.Utils
{
public static class TextUtils
{
/// <summary>
/// Copy null-terminated Utf8 string from native memory to managed.
/// </summary>
public static string StringFromNativeUtf8(IntPtr nativeUtf8)
{
// check input null
if (nativeUtf8 == IntPtr.Zero)
return null;

// find null terminator
var len = 0;
while (Marshal.ReadByte(nativeUtf8, len) != 0) ++len;

// check empty string
if (len == 0)
return "";

// copy buffer from beginning to null position
var buffer = new byte[len];
Marshal.Copy(nativeUtf8, buffer, 0, buffer.Length);
return Encoding.UTF8.GetString(buffer);
}
}
}
3 changes: 3 additions & 0 deletions Packages/com.whisper.unity/Runtime/Utils/TextUtils.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Packages/com.whisper.unity/Runtime/WhisperWrapper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ private WhisperSegment GetSegment(int i, WhisperParams param)
{
// get segment text and timestamps
var textPtr = WhisperNative.whisper_full_get_segment_text(_whisperCtx, i);
var text = Marshal.PtrToStringAnsi(textPtr);
var text = TextUtils.StringFromNativeUtf8(textPtr);
var start = WhisperNative.whisper_full_get_segment_t0(_whisperCtx, i);
var end = WhisperNative.whisper_full_get_segment_t1(_whisperCtx, i);
var segment = new WhisperSegment(i, text, start, end);
Expand All @@ -182,7 +182,7 @@ private WhisperSegment GetSegment(int i, WhisperParams param)
{
var nativeToken = WhisperNative.whisper_full_get_token_data(_whisperCtx, i, j);
var textTokenPtr = WhisperNative.whisper_full_get_token_text(_whisperCtx, i, j);
var textToken = Marshal.PtrToStringAnsi(textTokenPtr);
var textToken = TextUtils.StringFromNativeUtf8(textTokenPtr);
var isSpecial = nativeToken.id >= WhisperNative.whisper_token_eot(_whisperCtx);
var token = new WhisperTokenData(nativeToken, textToken, param.TokenTimestamps, isSpecial);
segment.Tokens[j] = token;
Expand Down

0 comments on commit 7484284

Please sign in to comment.