Skip to content

Commit

Permalink
Add progress callback (#35)
Browse files Browse the repository at this point in the history
* Progress callback

* Fixed error in parameters

* Updated samples

* Fixed scrolling

* Remove test files
  • Loading branch information
Macoron committed Jun 21, 2023
1 parent 7484284 commit 2ff649c
Show file tree
Hide file tree
Showing 12 changed files with 136 additions and 30 deletions.
5 changes: 3 additions & 2 deletions Assets/Samples/1 - Audio Clip/1 - Audio Clip.unity
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ MonoBehaviour:
m_HorizontalOverflow: 0
m_VerticalOverflow: 1
m_LineSpacing: 1
m_Text: 'Time: '
m_Text:
--- !u!222 &167558893
CanvasRenderer:
m_ObjectHideFlags: 0
Expand Down Expand Up @@ -1767,7 +1767,7 @@ RectTransform:
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 0}
m_AnchorMax: {x: 1, y: 1}
m_AnchoredPosition: {x: 0.000030517578, y: 0}
m_AnchoredPosition: {x: 0.000061035156, y: 0}
m_SizeDelta: {x: 0, y: 0}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &1321875564
Expand Down Expand Up @@ -1884,6 +1884,7 @@ MonoBehaviour:
timeText: {fileID: 167558892}
initialPromptDropdown: {fileID: 1902663392}
selectedInitialPromptInput: {fileID: 247128853}
scroll: {fileID: 145324201}
--- !u!1 &1370179832
GameObject:
m_ObjectHideFlags: 0
Expand Down
12 changes: 11 additions & 1 deletion Assets/Samples/1 - Audio Clip/AudioClipDemo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
using System.Text;
using UnityEngine;
using UnityEngine.UI;
using Whisper.Utils;

// ReSharper disable ArrangeObjectCreationWhenTypeEvident - for Unity 2019/2020 support:

namespace Whisper.Samples
Expand Down Expand Up @@ -57,6 +59,7 @@ public class InitialPrompt
public Text timeText;
public Dropdown initialPromptDropdown;
public InputField selectedInitialPromptInput;
public ScrollRect scroll;

private string _buffer;

Expand All @@ -65,6 +68,7 @@ private void Awake()
button.onClick.AddListener(ButtonPressed);
if (streamSegments)
manager.OnNewSegment += OnNewSegmentHandler;
manager.OnProgress += OnProgressHandler;

initialPromptDropdown.options = initialPrompts
.Select(x => new Dropdown.OptionData(x.name))
Expand Down Expand Up @@ -121,6 +125,8 @@ private void OnNewSegmentHandler(WhisperSegment segment)
_buffer += $"<b>{segment.TimestampToString()}</b>{segment.Text}\n";
outputText.text = _buffer;
}

UiUtils.ScrollDown(scroll);
}

private string GetFinalText(WhisperResult output)
Expand All @@ -137,7 +143,11 @@ private string GetFinalText(WhisperResult output)

return sb.ToString();
}


private void OnProgressHandler(int progress)
{
timeText.text = $"Progress: {progress}%";
}
}
}

Expand Down
8 changes: 6 additions & 2 deletions Assets/Samples/2 - Microphone/2 - Microphone.unity
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ RenderSettings:
m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.44657898, g: 0.4964133, b: 0.5748178, a: 1}
m_IndirectSpecularColor: {r: 0.44657815, g: 0.49641192, b: 0.57481617, a: 1}
m_UseRadianceAmbientProbe: 0
--- !u!157 &3
LightmapSettings:
Expand Down Expand Up @@ -395,7 +395,7 @@ MonoBehaviour:
m_HorizontalOverflow: 0
m_VerticalOverflow: 1
m_LineSpacing: 1
m_Text: 'Time: '
m_Text:
--- !u!222 &167558893
CanvasRenderer:
m_ObjectHideFlags: 0
Expand Down Expand Up @@ -1019,6 +1019,9 @@ MonoBehaviour:
strategy: 0
noContext: 1
singleSegment: 1
enableTokens: 0
initialPrompt:
tokensTimestamps: 0
speedUp: 0
audioCtx: 0
--- !u!1 &637716610
Expand Down Expand Up @@ -2753,6 +2756,7 @@ MonoBehaviour:
timeText: {fileID: 167558892}
languageDropdown: {fileID: 1400360880}
translateToggle: {fileID: 1425433665}
scroll: {fileID: 942089564}
--- !u!114 &1337591123
MonoBehaviour:
m_ObjectHideFlags: 0
Expand Down
12 changes: 11 additions & 1 deletion Assets/Samples/2 - Microphone/MicrophoneDemo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
using UnityEngine;
using UnityEngine.UI;
using Whisper.Utils;
using Button = UnityEngine.UI.Button;
using Toggle = UnityEngine.UI.Toggle;

namespace Whisper.Samples
{
Expand All @@ -19,7 +21,8 @@ public class MicrophoneDemo : MonoBehaviour
public Text timeText;
public Dropdown languageDropdown;
public Toggle translateToggle;

public ScrollRect scroll;

private string _buffer;

private void Awake()
Expand All @@ -37,6 +40,7 @@ private void Awake()

if (streamSegments)
whisper.OnNewSegment += WhisperOnOnNewSegment;
whisper.OnProgress += OnProgressHandler;
}

private void OnButtonPressed()
Expand Down Expand Up @@ -86,6 +90,12 @@ private void WhisperOnOnNewSegment(WhisperSegment segment)
{
_buffer += segment.Text;
outputText.text = _buffer + "...";
UiUtils.ScrollDown(scroll);
}

private void OnProgressHandler(int progress)
{
timeText.text = $"Progress: {progress}%";
}
}
}
4 changes: 3 additions & 1 deletion Assets/Samples/4 - Subtitles/4 - Subtitles.unity
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ MonoBehaviour:
m_HorizontalOverflow: 0
m_VerticalOverflow: 1
m_LineSpacing: 1
m_Text: 'Time: '
m_Text:
--- !u!222 &167558893
CanvasRenderer:
m_ObjectHideFlags: 0
Expand Down Expand Up @@ -812,6 +812,7 @@ MonoBehaviour:
noContext: 1
singleSegment: 0
enableTokens: 1
initialPrompt:
tokensTimestamps: 1
speedUp: 0
audioCtx: 0
Expand Down Expand Up @@ -1908,6 +1909,7 @@ MonoBehaviour:
timeText: {fileID: 167558892}
languageDropdown: {fileID: 1400360880}
translateToggle: {fileID: 1425433665}
scroll: {fileID: 942089564}
--- !u!1 &1345287211
GameObject:
m_ObjectHideFlags: 0
Expand Down
9 changes: 9 additions & 0 deletions Assets/Samples/4 - Subtitles/SubtitlesDemo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System.Threading.Tasks;
using UnityEngine;
using UnityEngine.UI;
using Whisper.Utils;

namespace Whisper.Samples
{
Expand All @@ -18,12 +19,14 @@ public class SubtitlesDemo : MonoBehaviour
public Text timeText;
public Dropdown languageDropdown;
public Toggle translateToggle;
public ScrollRect scroll;

private void Awake()
{
// we need to force this settings for whisper
whisper.enableTokens = true;
whisper.tokensTimestamps = true;
whisper.OnProgress += OnProgressHandler;

languageDropdown.value = languageDropdown.options
.FindIndex(op => op.text == whisper.language);
Expand Down Expand Up @@ -71,6 +74,7 @@ private async void OnButtonPressed()
{
var text = GetSubtitles(res, source.time);
outputText.text = text;
UiUtils.ScrollDown(scroll);
await Task.Yield();

// check that audio source still here and wasn't destroyed
Expand Down Expand Up @@ -153,5 +157,10 @@ private static string ProbabilityToColor(float p)
else
return "green";
}

private void OnProgressHandler(int progress)
{
timeText.text = $"Progress: {progress}%";
}
}
}
33 changes: 13 additions & 20 deletions Packages/com.whisper.unity/Runtime/Native/WhisperNativeParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ public enum WhisperSamplingStrategy
public delegate void whisper_new_segment_callback(whisper_context_ptr ctx, whisper_state_ptr state,
int n_new, System.IntPtr user_data);

[UnmanagedFunctionPointer(CallingConvention.StdCall)]
public delegate void whisper_progress_callback(whisper_context_ptr ctx, whisper_state_ptr state,
int progress, System.IntPtr user_data);

/// <summary>
/// This is direct copy of C++ struct.
/// Do not change or add any fields without changing it in whisper.cpp.
Expand Down Expand Up @@ -89,23 +93,15 @@ public unsafe struct WhisperNativeParams

// for auto-detection, set to nullptr, "" or "auto"
public byte* language;
[MarshalAs(UnmanagedType.U1)] bool detect_language;

// common decoding parameters:
[MarshalAs(UnmanagedType.U1)]
bool
suppress_blank; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L89

[MarshalAs(UnmanagedType.U1)]
bool
suppress_non_speech_tokens; // ref: https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253
[MarshalAs(UnmanagedType.U1)] bool suppress_blank; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L89
[MarshalAs(UnmanagedType.U1)] bool suppress_non_speech_tokens; // ref: https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253

float temperature; // initial decoding temperature, ref: https://ai.stackexchange.com/a/32478

float
max_initial_ts; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97

float
length_penalty; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L267
float max_initial_ts; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97
float length_penalty; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L267

// fallback parameters
// ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L274-L278
Expand All @@ -117,18 +113,15 @@ public unsafe struct WhisperNativeParams
[StructLayout(LayoutKind.Sequential)]
struct greedy_struct
{
int
best_of; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264
int best_of; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264
}

greedy_struct greedy;

[StructLayout(LayoutKind.Sequential)]
struct beam_search_struct
{
int
beam_size; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L265

int beam_size; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L265
float patience; // TODO: not implemented, ref: https://arxiv.org/pdf/2204.05424.pdf
}

Expand All @@ -139,8 +132,8 @@ struct beam_search_struct
public System.IntPtr new_segment_callback_user_data;

// called on each progress update
void* progress_callback;
void* progress_callback_user_data;
public whisper_progress_callback progress_callback;
public System.IntPtr progress_callback_user_data;

// called each time before the encoder starts
void* encoder_begin_callback;
Expand Down
20 changes: 20 additions & 0 deletions Packages/com.whisper.unity/Runtime/Utils/UiUtils.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using System.Threading.Tasks;
using UnityEngine;
using UnityEngine.UI;

namespace Whisper.Utils
{
public static class UiUtils
{
/// <summary>
/// Scroll <see cref="ScrollRect"/> down to the bottom.
/// </summary>
public static async void ScrollDown(ScrollRect scroll)
{
await Task.Yield();
Canvas.ForceUpdateCanvases ();
scroll.normalizedPosition = new Vector2(0, 0);
Canvas.ForceUpdateCanvases ();
}
}
}
3 changes: 3 additions & 0 deletions Packages/com.whisper.unity/Runtime/Utils/UiUtils.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion Packages/com.whisper.unity/Runtime/WhisperManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ public class WhisperManager : MonoBehaviour
public int audioCtx;

public event OnNewSegmentDelegate OnNewSegment;
public event OnProgressDelegate OnProgress;

private WhisperWrapper _whisper;
private WhisperParams _params;
Expand Down Expand Up @@ -100,14 +101,15 @@ public async Task InitModel()
_whisper = await WhisperWrapper.InitFromFileAsync(path);
_params = WhisperParams.GetDefaultParams(strategy);
_whisper.OnNewSegment += OnNewSegmentHandler;
_whisper.OnProgress += OnProgressHandler;
}
catch (Exception e)
{
Debug.LogException(e);
}
IsLoading = false;
}

public bool IsMultilingual()
{
if (!IsLoaded)
Expand Down Expand Up @@ -185,6 +187,14 @@ private void OnNewSegmentHandler(WhisperSegment segment)
OnNewSegment?.Invoke(segment);
});
}

private void OnProgressHandler(int progress)
{
_dispatcher.Execute(() =>
{
OnProgress?.Invoke(progress);
});
}
}
}

26 changes: 24 additions & 2 deletions Packages/com.whisper.unity/Runtime/WhisperParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -284,10 +284,32 @@ public whisper_new_segment_callback NewSegmentCallback
}

/// <summary>
/// Pointer to data that you want to pass as a parameter for callback.
/// Pointer to data that you want to pass as a parameter for <see cref="NewSegmentCallback"/>.
/// It will be relayed from whisper code without any changes.
/// </summary>
public IntPtr NewSegmentCallbackUseData
public IntPtr NewSegmentCallbackUserData
{
get => _param.new_segment_callback_user_data;
set => _param.new_segment_callback_user_data = value;
}

/// <summary>
/// Called on each progress update.
/// Because of IL2CPP, this should be a static function.
/// <see cref="WhisperWrapper"/> and <see cref="WhisperManager"/> will use it
/// to raise their custom event.
/// </summary>
public whisper_progress_callback ProgressCallback
{
get => _param.progress_callback;
set => _param.progress_callback = value;
}

/// <summary>
/// Pointer to data that you want to pass as a parameter for <see cref="ProgressCallback"/>.
/// It will be relayed from whisper code without any changes.
/// </summary>
public IntPtr ProgressCallbackUserData
{
get => _param.new_segment_callback_user_data;
set => _param.new_segment_callback_user_data = value;
Expand Down
Loading

0 comments on commit 2ff649c

Please sign in to comment.