// Copyright (C) 2023 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
//////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
/*
PRIVATE IMPLEMENTATION DETAILS
USERS, YOU DO NOT NEED TO READ THIS
*/
//////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
/*
Notes for developers:
The file structure for all things ffmpeg is as follows:
- ffmpeg_details.h : contains implementations details only and everything must be in the
dlib::ffmpeg::details namespace
- ffmpeg_utils.h : contains common public API. Definitions go at the bottom of the file
underneath a block comment saying "DEFINITIONS"
Also contains implementation details that depend on the public API.
This must still go in the dlib::ffmpeg::details namespace
- ffmpeg_demuxer.h : contains public API for all things decoding. Similarly, definitions go
at the bottom of the file underneath a block comment saying "DEFINITIONS".
- ffmpeg_muxer.h : contains public API for all things encoding. Similarly, definitions go
at the bottom of the file underneath a block comment saying "DEFINITIONS".
*/
//////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
#ifndef DLIB_FFMPEG_DETAILS
#define DLIB_FFMPEG_DETAILS
#include "../test_for_odr_violations.h"
#ifndef DLIB_USE_FFMPEG
static_assert(false, "This version of dlib isn't built with the FFMPEG wrappers");
#endif
extern "C" {
#include <libavutil/dict.h>
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>
#include <libavutil/frame.h>
#include <libavutil/channel_layout.h>
#include <libavutil/audio_fifo.h>
#include <libavutil/imgutils.h>
#include <libavutil/log.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
#include <libavformat/avformat.h>
#include <libavdevice/avdevice.h>
#include <libavcodec/avcodec.h>
}
#include <string>
#include <memory>
#include "../logger.h"
#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 24, 100)
#define FFMPEG_HAS_CH_LAYOUT 1
#endif
namespace dlib { namespace ffmpeg { namespace details
{
// ---------------------------------------------------------------------------------------------------
inline dlib::logger& logger_ffmpeg_private()
{
static dlib::logger GLOBAL("ffmpeg.internal");
return GLOBAL;
}
// ---------------------------------------------------------------------------------------------------
inline void register_ffmpeg()
{
static const bool REGISTERED = []
{
avdevice_register_all();
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 10, 100)
// See https://github.com/FFmpeg/FFmpeg/blob/70d25268c21cbee5f08304da95be1f647c630c15/doc/APIchanges#L91
avcodec_register_all();
#endif
#if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(58, 9, 100)
// See https://github.com/FFmpeg/FFmpeg/blob/70d25268c21cbee5f08304da95be1f647c630c15/doc/APIchanges#L86
av_register_all();
#endif
av_log_set_callback([](void* ptr, int level, const char *fmt, va_list vl)
{
auto& logger = logger_ffmpeg_private();
char line[256] = {0};
static int print_prefix = 1;
// Not sure if copying to vl2 is required by internal ffmpeg functions do this...
va_list vl2;
va_copy(vl2, vl);
int size = av_log_format_line2(ptr, level, fmt, vl2, &line[0], sizeof(line), &print_prefix);
va_end(vl2);
// Remove all '\n' since dlib's logger already adds one
size = std::min<int>(size, sizeof(line) - 1);
line[size] = '\0';
for (int i = size - 1 ; i >= 0 ; --i)
if (line[i] == '\n')
line[i] = ' ';
switch(level)
{
case AV_LOG_PANIC:
case AV_LOG_FATAL: logger << LFATAL << line; break;
case AV_LOG_ERROR: logger << LERROR << line; break;
case AV_LOG_WARNING: logger << LWARN << line; break;
case AV_LOG_INFO:
case AV_LOG_VERBOSE: logger << LINFO << line; break;
case AV_LOG_DEBUG: logger << LDEBUG << line; break;
case AV_LOG_TRACE: logger << LTRACE << line; break;
default: break;
}
});
return true;
}();
(void)REGISTERED;
}
// ---------------------------------------------------------------------------------------------------
inline std::string get_av_error(int ret)
{
char buf[128] = {0};
int suc = av_strerror(ret, buf, sizeof(buf));
return suc == 0 ? buf : "couldn't set error";
}
// ---------------------------------------------------------------------------------------------------
///////////////////////////
// Channel layout stuff
///////////////////////////
inline uint64_t get_layout_from_channels(const std::size_t nchannels)
{
// This function is a bit ambiguous but good enough for dlib.
// Multiple layouts can have the same number of channels
switch(nchannels)
{
case 1: return AV_CH_LAYOUT_MONO;
case 2: return AV_CH_LAYOUT_STEREO;
default: DLIB_CASSERT(false, "Don't support " << nchannels << " yet"); return 0;
}
}
#if FFMPEG_HAS_CH_LAYOUT
inline AVChannelLayout convert_layout(const uint64_t channel_layout)
{
AVChannelLayout ch_layout;
ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
ch_layout.u.mask = channel_layout;
ch_layout.nb_channels = [=]
{
switch(channel_layout)
{
case AV_CH_LAYOUT_MONO: return 1;
case AV_CH_LAYOUT_STEREO: return 2;
case AV_CH_LAYOUT_2POINT1: return 3;
case AV_CH_LAYOUT_2_1: return 3;
case AV_CH_LAYOUT_SURROUND: return 3;
case AV_CH_LAYOUT_3POINT1: return 4;
case AV_CH_LAYOUT_4POINT0: return 4;
case AV_CH_LAYOUT_4POINT1: return 5;
case AV_CH_LAYOUT_2_2: return 4;
case AV_CH_LAYOUT_QUAD: return 4;
case AV_CH_LAYOUT_5POINT0: return 5;
case AV_CH_LAYOUT_5POINT1: return 6;
case AV_CH_LAYOUT_5POINT0_BACK: return 5;
case AV_CH_LAYOUT_5POINT1_BACK: return 6;
case AV_CH_LAYOUT_6POINT0: return 6;
case AV_CH_LAYOUT_6POINT0_FRONT: return 6;
case AV_CH_LAYOUT_HEXAGONAL: return 6;
case AV_CH_LAYOUT_6POINT1: return 7;
case AV_CH_LAYOUT_6POINT1_BACK: return 7;
case AV_CH_LAYOUT_6POINT1_FRONT: return 7;
case AV_CH_LAYOUT_7POINT0: return 7;
case AV_CH_LAYOUT_7POINT0_FRONT: return 7;
case AV_CH_LAYOUT_7POINT1: return 8;
case AV_CH_LAYOUT_7POINT1_WIDE: return 8;
case AV_CH_LAYOUT_7POINT1_WIDE_BACK: return 8;
case AV_CH_LAYOUT_OCTAGONAL: return 8;
case AV_CH_LAYOUT_HEXADECAGONAL: return 16;
case AV_CH_LAYOUT_STEREO_DOWNMIX: return 2;
#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 58, 100)
case AV_CH_LAYOUT_22POINT2: return 24;
#endif
default: break;
}
return 0;
}();
return ch_layout;
}
inline std::string get_channel_layout_str(const AVChannelLayout& ch_layout)
{
std::string str(32, '\0');
const int ret = av_channel_layout_describe(&ch_layout, &str[0], str.size());
if (ret > 0)
str.resize(ret);
else
str.clear();
return str;
}
inline std::string get_channel_layout_str(uint64_t channel_layout)
{
return get_channel_layout_str(convert_layout(channel_layout));
}
inline std::string get_channel_layout_str(const AVCodecContext* pCodecCtx)
{
return get_channel_layout_str(pCodecCtx->ch_layout);
}
inline bool channel_layout_empty(const AVCodecContext* pCodecCtx)
{
return av_channel_layout_check(&pCodecCtx->ch_layout) == 0;
}
inline bool channel_layout_empty(const AVFrame* frame)
{
return frame && av_channel_layout_check(&frame->ch_layout) == 0;
}
inline uint64_t get_layout(const AVCodecContext* pCodecCtx)
{
return pCodecCtx ? pCodecCtx->ch_layout.u.mask : 0;
}
inline uint64_t get_layout(const AVFrame* frame)
{
return frame ? frame->ch_layout.u.mask : 0;
}
inline void set_layout(AVCodecContext* pCodecCtx, const uint64_t channel_layout)
{
pCodecCtx->ch_layout = convert_layout(channel_layout);
}
inline void set_layout(AVFrame* frame, const uint64_t channel_layout)
{
frame->ch_layout = convert_layout(channel_layout);
}
inline int get_nchannels(const AVCodecContext* pCodecCtx)
{
return pCodecCtx ? pCodecCtx->ch_layout.nb_channels : 0;
}
inline int get_nchannels(const AVFrame* frame)
{
return frame ? frame->ch_layout.nb_channels : 0;
}
inline int get_nchannels(const uint64_t channel_layout)
{
return convert_layout(channel_layout).nb_channels;
}
inline void check_layout(AVCodecContext* pCodecCtx)
{
if (get_layout(pCodecCtx) == 0 && pCodecCtx->ch_layout.nb_channels > 0)
av_channel_layout_default(&pCodecCtx->ch_layout, pCodecCtx->ch_layout.nb_channels);
}
#else
inline std::string get_channel_layout_str(uint64_t channel_layout)
{
std::string str(32, '\0');
av_get_channel_layout_string(&str[0], str.size(), 0, channel_layout);
str.resize(strlen(str.data()));
return str;
}
inline std::string get_channel_layout_str(const AVCodecContext* pCodecCtx)
{
return get_channel_layout_str(pCodecCtx->channel_layout);
}
inline bool channel_layout_empty(const AVCodecContext* pCodecCtx)
{
return pCodecCtx->channel_layout == 0;
}
inline bool channel_layout_empty(const AVFrame* frame)
{
return frame->channel_layout == 0;
}
inline uint64_t get_layout(const AVCodecContext* pCodecCtx)
{
return pCodecCtx ? pCodecCtx->channel_layout : 0;
}
inline uint64_t get_layout(const AVFrame* frame)
{
return frame ? frame->channel_layout : 0;
}
inline void set_layout(AVCodecContext* pCodecCtx, const uint64_t channel_layout)
{
pCodecCtx->channel_layout = channel_layout;
}
inline void set_layout(AVFrame* frame, const uint64_t channel_layout)
{
frame->channel_layout = channel_layout;
}
inline int get_nchannels(const uint64_t channel_layout)
{
return av_get_channel_layout_nb_channels(channel_layout);
}
inline int get_nchannels(const AVCodecContext* pCodecCtx)
{
return pCodecCtx ? get_nchannels(pCodecCtx->channel_layout) : 0;
}
inline int get_nchannels(const AVFrame* frame)
{
return frame ? get_nchannels(frame->channel_layout) : 0;
}
inline void check_layout(AVCodecContext* pCodecCtx)
{
if (pCodecCtx->channel_layout == 0 && pCodecCtx->channels > 0)
pCodecCtx->channel_layout = av_get_default_channel_layout(pCodecCtx->channels);
}
#endif
// ---------------------------------------------------------------------------------------------------
struct av_deleter
{
void operator()(AVFrame* ptr) const;
void operator()(AVPacket* ptr) const;
void operator()(AVAudioFifo* ptr) const;
void operator()(SwsContext* ptr) const;
void operator()(SwrContext* ptr) const;
void operator()(AVCodecContext* ptr) const;
void operator()(AVCodecParserContext* ptr) const;
void operator()(AVFormatContext* ptr) const;
void operator()(AVDeviceInfoList* ptr) const;
};
inline void av_deleter::operator()(AVFrame *ptr) const { if (ptr) av_frame_free(&ptr); }
inline void av_deleter::operator()(AVPacket *ptr) const { if (ptr) av_packet_free(&ptr); }
inline void av_deleter::operator()(AVAudioFifo *ptr) const { if (ptr) av_audio_fifo_free(ptr); }
inline void av_deleter::operator()(SwsContext *ptr) const { if (ptr) sws_freeContext(ptr); }
inline void av_deleter::operator()(SwrContext *ptr) const { if (ptr) swr_free(&ptr); }
inline void av_deleter::operator()(AVCodecContext *ptr) const { if (ptr) avcodec_free_context(&ptr); }
inline void av_deleter::operator()(AVCodecParserContext *ptr) const { if (ptr) av_parser_close(ptr); }
inline void av_deleter::operator()(AVDeviceInfoList* ptr) const { if (ptr) avdevice_free_list_devices(&ptr); }
inline void av_deleter::operator()(AVFormatContext *ptr) const
{
if (ptr)
{
if (ptr->iformat)
avformat_close_input(&ptr);
else if (ptr->oformat)
avformat_free_context(ptr);
}
}
template<class AVObject>
using av_ptr = std::unique_ptr<AVObject, details::av_deleter>;
// ---------------------------------------------------------------------------------------------------
inline av_ptr<AVFrame> make_avframe()
{
av_ptr<AVFrame> obj(av_frame_alloc());
if (!obj)
throw std::runtime_error("Failed to allocate AVframe");
return obj;
}
inline av_ptr<AVPacket> make_avpacket()
{
av_ptr<AVPacket> obj(av_packet_alloc());
if (!obj)
throw std::runtime_error("Failed to allocate AVPacket");
return obj;
}
// ---------------------------------------------------------------------------------------------------
struct av_dict
{
av_dict() = default;
av_dict(const std::unordered_map<std::string, std::string> &options);
av_dict(const av_dict &ori);
av_dict &operator=(const av_dict &ori);
av_dict(av_dict &&ori) noexcept;
av_dict &operator=(av_dict &&ori) noexcept;
~av_dict();
size_t size() const;
void print() const;
AVDictionary** get();
AVDictionary *avdic = nullptr;
};
inline av_dict::av_dict(const std::unordered_map<std::string, std::string>& options)
{
int ret = 0;
for (const auto& opt : options) {
if ((ret = av_dict_set(&avdic, opt.first.c_str(), opt.second.c_str(), 0)) < 0) {
printf("av_dict_set() failed : %s\n", get_av_error(ret).c_str());
break;
}
}
}
inline av_dict::av_dict(const av_dict& ori)
{
av_dict_copy(&avdic, ori.avdic, 0);
}
inline av_dict& av_dict::operator=(const av_dict& ori)
{
*this = std::move(av_dict{ori});
return *this;
}
inline av_dict::av_dict(av_dict &&ori) noexcept
: avdic{std::exchange(ori.avdic, nullptr)}
{
}
inline av_dict &av_dict::operator=(av_dict &&ori) noexcept
{
if (this != &ori)
avdic = std::exchange(ori.avdic, nullptr);
return *this;
}
inline av_dict::~av_dict()
{
if (avdic)
av_dict_free(&avdic);
}
inline AVDictionary** av_dict::get()
{
return avdic ? &avdic: nullptr;
}
inline std::size_t av_dict::size() const
{
return avdic ? av_dict_count(avdic) : 0;
}
inline void av_dict::print() const
{
if (avdic)
{
AVDictionaryEntry *tag = nullptr;
while ((tag = av_dict_get(avdic, "", tag, AV_DICT_IGNORE_SUFFIX)))
printf("%s : %s\n", tag->key, tag->value);
}
}
// ---------------------------------------------------------------------------------------------------
inline AVCodecID pick_codec_from_filename(const std::string& filename)
{
const auto ext_pos = filename.find_last_of(".");
if (ext_pos != std::string::npos)
{
const std::string ext = filename.substr(ext_pos + 1);
if (ext == "png" || ext == "PNG")
return AV_CODEC_ID_PNG;
else if (ext == "jpeg" || ext == "jpg" || ext == "JPEG")
return AV_CODEC_ID_MJPEG;
else if (ext == "tiff")
return AV_CODEC_ID_TIFF;
else if (ext == "webp")
return AV_CODEC_ID_WEBP;
else if (ext == "bmp")
return AV_CODEC_ID_BMP;
else if (ext == "h264")
return AV_CODEC_ID_H264;
else if (ext == "h265" || ext == "hevc")
return AV_CODEC_ID_H265;
else if (ext == "aac")
return AV_CODEC_ID_AAC;
else if (ext == "ac3")
return AV_CODEC_ID_AC3;
else if (ext == "jls")
return AV_CODEC_ID_JPEGLS;
else if (ext == "jp2")
return AV_CODEC_ID_JPEG2000;
#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 37, 100)
else if (ext == "jxl")
return AV_CODEC_ID_JPEGXL;
#endif
}
return AV_CODEC_ID_NONE;
}
// ---------------------------------------------------------------------------------------------------
}}}
#endif //DLIB_FFMPEG_DETAILS