Skip to content

Commit

Permalink
Add support for trnascoding compressed input with ffmpeg libs (Linux)
Browse files Browse the repository at this point in the history
- search for ffmpeg at cmake time
- added ffmpeg_transcode.cpp into libcommon if ffmpeg on
- hooked ffmpeg trancoding in common read_wav(...)
- passed test:
./main -m ggml-base.en.bin -f samples/jfk.mp3
  • Loading branch information
WilliamTambellini committed May 15, 2024
1 parent 4ef8d9f commit 95a657e
Show file tree
Hide file tree
Showing 9 changed files with 576 additions and 2 deletions.
26 changes: 26 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDA

option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)

if (CMAKE_SYSTEM_NAME MATCHES "Linux")
option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs" OFF)
endif()

option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
option(WHISPER_NO_AVX512 "whisper: disable AVX512" ON)
Expand Down Expand Up @@ -125,6 +129,28 @@ else()
set(CMAKE_CXX_STANDARD 11)
endif()

if (WHISPER_FFMPEG)
# As today (cmake 3.27), there is no official cmake support for FindFFmpeg.
# Consequnelty we added a FindFFmpeg.cmake script the cmake subfolder:
#unset(FFmpeg_FIND_COMPONENTS CACHE)
#set(FFmpeg_FIND_COMPONENTS AVFORMAT AVCODEC AVUTIL) # AVFILTER
find_package(FFmpeg REQUIRED)
if (NOT ${FFMPEG_FOUND})
message(FATAL_ERROR "Cannot find ffmpeg libs/headers")
endif()
message(STATUS "Found ffmpeg libs: ${FFMPEG_LIBRARIES}")
message(STATUS "Found ffmpeg headers in: ${FFMPEG_INCLUDE_DIRS}")
message(STATUS "ffmpeg definitions: ${FFMPEG_DEFINITIONS}")
message(STATUS "Found avformat ${AVFORMAT_VERSION}")
include_directories(${FFMPEG_INCLUDE_DIRS})
add_compile_definitions(WHISPER_FFMPEG)
# not swscale is not needed
# libswresample performs highly optimized audio resampling, rematrixing and sample format conversion operations
# libavcodec provides a generic encoding/decoding framework and contains multiple decoders and encoders for audio, video and subtitle streams, and several bitstream filters.
# libavformat provides a generic framework for multiplexing and demultiplexing (muxing and demuxing) audio, video and subtitle streams.
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${FFMPEG_LIBRARIES})
endif()

# on APPLE
if (APPLE)
# include Accelerate framework
Expand Down
163 changes: 163 additions & 0 deletions cmake/FindFFmpeg.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# From
# https://github.com/snikulov/cmake-modules/blob/master/FindFFmpeg.cmake
#
# vim: ts=2 sw=2
# - Try to find the required ffmpeg components(default: AVFORMAT, AVUTIL, AVCODEC)
#
# Once done this will define
# FFMPEG_FOUND - System has the all required components.
# FFMPEG_INCLUDE_DIRS - Include directory necessary for using the required components headers.
# FFMPEG_LIBRARIES - Link these to use the required ffmpeg components.
# FFMPEG_DEFINITIONS - Compiler switches required for using the required ffmpeg components.
#
# For each of the components it will additionally set.
# - AVCODEC
# - AVDEVICE
# - AVFORMAT
# - AVFILTER
# - AVUTIL
# - POSTPROC
# - SWSCALE
# the following variables will be defined
# <component>_FOUND - System has <component>
# <component>_INCLUDE_DIRS - Include directory necessary for using the <component> headers
# <component>_LIBRARIES - Link these to use <component>
# <component>_DEFINITIONS - Compiler switches required for using <component>
# <component>_VERSION - The components version
#
# Copyright (c) 2006, Matthias Kretz, <[email protected]>
# Copyright (c) 2008, Alexander Neundorf, <[email protected]>
# Copyright (c) 2011, Michael Jansen, <[email protected]>
#
# Redistribution and use is allowed according to the terms of the BSD license.
# For details see the accompanying COPYING-CMAKE-SCRIPTS file.

include(FindPackageHandleStandardArgs)

# The default components were taken from a survey over other FindFFMPEG.cmake files
if (NOT FFmpeg_FIND_COMPONENTS)
set(FFmpeg_FIND_COMPONENTS AVFORMAT AVCODEC AVUTIL SWRESAMPLE)
endif ()

#
### Macro: set_component_found
#
# Marks the given component as found if both *_LIBRARIES AND *_INCLUDE_DIRS is present.
#
macro(set_component_found _component )
if (${_component}_LIBRARIES AND ${_component}_INCLUDE_DIRS)
message(DEBUG " - ${_component} found.")
set(${_component}_FOUND TRUE)
else ()
message(DEBUG " - ${_component} not found.")
endif ()
endmacro()

#
### Macro: find_component
#
# Checks for the given component by invoking pkgconfig and then looking up the libraries and
# include directories.
#
macro(find_component _component _pkgconfig _library _header)

if (NOT WIN32)
# use pkg-config to get the directories and then use these values
# in the FIND_PATH() and FIND_LIBRARY() calls
find_package(PkgConfig)
if (PKG_CONFIG_FOUND)
pkg_check_modules(PC_${_component} ${_pkgconfig})
message(STATUS "Pkgconfig found: ${PC_${_component}_INCLUDEDIR}")
message(STATUS "Pkgconfig found: ${PC_${_component}_INCLUDE_DIRS}")
message(STATUS "${PC_${_component}_CFLAGS}")
endif ()
endif (NOT WIN32)


find_path(${_component}_INCLUDE_DIRS ${_header}
HINTS
${PC_${_component}_INCLUDEDIR}
${PC_${_component}_INCLUDE_DIRS}
PATH_SUFFIXES
ffmpeg
)

# CMake's default is to search first for shared libraries and then for static libraries.
# Todo later: add option to prefer static libs over dynamic:
find_library(${_component}_LIBRARIES NAMES ${_library} lib${_library}.a
HINTS
${PC_${_component}_LIBDIR}
${PC_${_component}_LIBRARY_DIRS}
)

set(${_component}_DEFINITIONS ${PC_${_component}_CFLAGS_OTHER} CACHE STRING "The ${_component} CFLAGS.")
set(${_component}_VERSION ${PC_${_component}_VERSION} CACHE STRING "The ${_component} version number.")

set_component_found(${_component})

mark_as_advanced(
${_component}_INCLUDE_DIRS
${_component}_LIBRARIES
${_component}_DEFINITIONS
${_component}_VERSION)

endmacro()


# Check for cached results. If there are skip the costly part.
if (NOT FFMPEG_LIBRARIES)

# Check for all possible component.
find_component(AVCODEC libavcodec avcodec libavcodec/avcodec.h)
find_component(AVFORMAT libavformat avformat libavformat/avformat.h)
find_component(AVDEVICE libavdevice avdevice libavdevice/avdevice.h)
#find_component(AVRESAMPLE libavresample avresample libavresample/avresample.h) # old name for swresample
find_component(AVUTIL libavutil avutil libavutil/avutil.h)
find_component(AVFILTER libavfilter avfilter libavfilter/avfilter.h)
find_component(SWSCALE libswscale swscale libswscale/swscale.h)
find_component(POSTPROC libpostproc postproc libpostproc/postprocess.h)
find_component(SWRESAMPLE libswresample swresample libswresample/swresample.h)

# Check if the required components were found and add their stuff to the FFMPEG_* vars.
foreach (_component ${FFmpeg_FIND_COMPONENTS})
if (${_component}_FOUND)
# message(STATUS "Required component ${_component} present.")
set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} ${${_component}_LIBRARIES})
set(FFMPEG_DEFINITIONS ${FFMPEG_DEFINITIONS} ${${_component}_DEFINITIONS})
list(APPEND FFMPEG_INCLUDE_DIRS ${${_component}_INCLUDE_DIRS})
else ()
# message(STATUS "Required component ${_component} missing.")
endif ()
endforeach ()

# Build the include path with duplicates removed.
if (FFMPEG_INCLUDE_DIRS)
list(REMOVE_DUPLICATES FFMPEG_INCLUDE_DIRS)
endif ()

# cache the vars.
set(FFMPEG_INCLUDE_DIRS ${FFMPEG_INCLUDE_DIRS} CACHE STRING "The FFmpeg include directories." FORCE)
set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} CACHE STRING "The FFmpeg libraries." FORCE)
set(FFMPEG_DEFINITIONS ${FFMPEG_DEFINITIONS} CACHE STRING "The FFmpeg cflags." FORCE)

mark_as_advanced(FFMPEG_INCLUDE_DIRS
FFMPEG_LIBRARIES
FFMPEG_DEFINITIONS)

endif ()

# Now set the noncached _FOUND vars for the components.
# whisper.cpp does not need SWSCALE
foreach (_component AVCODEC AVDEVICE AVFORMAT AVRESAMPLE AVUTIL POSTPROCESS)
set_component_found(${_component})
endforeach ()

# Compile the list of required vars
set(_FFmpeg_REQUIRED_VARS FFMPEG_LIBRARIES FFMPEG_INCLUDE_DIRS)
foreach (_component ${FFmpeg_FIND_COMPONENTS})
list(APPEND _FFmpeg_REQUIRED_VARS ${_component}_LIBRARIES ${_component}_INCLUDE_DIRS)
endforeach ()

# Give a nice error message if some of the required vars are missing.
find_package_handle_standard_args(FFmpeg DEFAULT_MSG ${_FFmpeg_REQUIRED_VARS})

5 changes: 5 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,18 @@ endif()

set(TARGET common)

if (WHISPER_FFMPEG)
set(FFMPEG_SRC ffmpeg_transcode.cpp)
endif()

add_library(${TARGET} STATIC
common.h
common.cpp
common-ggml.h
common-ggml.cpp
grammar-parser.h
grammar-parser.cpp
${FFMPEG_SRC}
)

include(DefaultTargetOptions)
Expand Down
18 changes: 17 additions & 1 deletion examples/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
#include <io.h>
#endif

#ifdef WHISPER_FFMPEG
// as implemented in ffmpeg_trancode.cpp only embedded in common lib if whisper built with ffmpeg support
extern bool ffmpeg_decode_audio(const std::string& ifname, std::vector<uint8_t>& wav_data);
#endif

// Function to check if the next argument exists
std::string get_next_arg(int& i, int argc, char** argv, const std::string& flag, gpt_params& params) {
if (i + 1 < argc && argv[i + 1][0] != '-') {
Expand Down Expand Up @@ -637,7 +642,7 @@ bool is_wav_buffer(const std::string buf) {

bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
drwav wav;
std::vector<uint8_t> wav_data; // used for pipe input from stdin
std::vector<uint8_t> wav_data; // used for pipe input from stdin or ffmpeg decoding output

if (fname == "-") {
{
Expand Down Expand Up @@ -670,8 +675,19 @@ bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector
}
}
else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) {
#if defined(WHISPER_FFMPEG)
if (ffmpeg_decode_audio(fname, wav_data) != 0) {
fprintf(stderr, "error: failed to ffmpeg decode '%s' \n", fname.c_str());
return false;
}
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
fprintf(stderr, "error: failed to read wav data as wav \n");
return false;
}
#else
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname.c_str());
return false;
#endif
}

if (wav.channels != 1 && wav.channels != 2) {
Expand Down

0 comments on commit 95a657e

Please sign in to comment.