35 AudioWaveformer::AudioWaveformer(
ReaderBase* new_reader) :
37 detached_reader(nullptr),
38 resolved_reader(nullptr),
39 source_initialized(false)
60 Fraction source_fps = ResolveSourceFPS(source);
62 AudioWaveformData base = ExtractSamplesFromReader(source, channel, num_per_second,
false);
65 if (
auto clip =
dynamic_cast<Clip*
>(reader)) {
67 Fraction project_fps = timeline ? timeline->
info.
fps : clip->Reader()->info.fps;
68 return ApplyKeyframes(base, &clip->time, &clip->volume, project_fps, source_fps, source->
info.
channels, num_per_second, channel, normalize);
73 float max_sample = 0.0f;
75 max_sample = std::max(max_sample, std::abs(v));
77 if (max_sample > 0.0f) {
90 AudioWaveformData data = ExtractSamplesFromReader(&temp_reader, channel, num_per_second, normalize);
108 AudioWaveformData base = ExtractSamplesFromReader(&temp_reader, channel, num_per_second,
false);
111 return ApplyKeyframes(base, time_keyframe, volume_keyframe, project_fps, source_fps, temp_reader.
info.
channels, num_per_second, channel, normalize);
124 if (num_per_second <= 0) {
128 double project_fps_value = project_fps.
ToDouble();
129 double source_fps_value = source_fps.
ToDouble();
130 if (project_fps_value <= 0.0 || source_fps_value <= 0.0) {
134 if (channel != -1 && (channel < 0 || channel >= source_channels)) {
139 if (base_total == 0) {
144 int64_t output_frames = 0;
145 if (time_keyframe && time_keyframe->
GetCount() > 0) {
146 output_frames = time_keyframe->
GetLength();
148 if (output_frames <= 0) {
150 double source_duration =
static_cast<double>(base_total) /
static_cast<double>(num_per_second);
151 output_frames =
static_cast<int64_t
>(std::llround(source_duration * project_fps_value));
153 double output_duration_seconds =
static_cast<double>(output_frames) / project_fps_value;
154 int total_samples =
static_cast<int>(std::ceil(output_duration_seconds * num_per_second));
156 if (total_samples <= 0) {
160 data.
resize(total_samples);
161 data.
zero(total_samples);
163 for (
int i = 0; i < total_samples; ++i) {
164 double out_time =
static_cast<double>(i) /
static_cast<double>(num_per_second);
166 double project_frame = out_time * project_fps_value;
167 double mapped_project_frame = time_keyframe ? time_keyframe->
GetValue(project_frame) : project_frame;
169 double source_time = mapped_project_frame / project_fps_value;
170 double source_index = source_time *
static_cast<double>(num_per_second);
173 int idx0 =
static_cast<int>(std::floor(source_index));
175 double frac = source_index -
static_cast<double>(idx0);
177 float max_sample = 0.0f;
178 float rms_sample = 0.0f;
179 if (idx0 >= 0 && idx0 <
static_cast<int>(base_total)) {
183 if (idx1 >= 0 && idx1 <
static_cast<int>(base_total)) {
184 max_sample =
static_cast<float>((1.0 - frac) * max_sample + frac * base.
max_samples[idx1]);
185 rms_sample =
static_cast<float>((1.0 - frac) * rms_sample + frac * base.
rms_samples[idx1]);
189 if (volume_keyframe) {
190 double project_frame = out_time * project_fps_value;
191 gain = volume_keyframe->
GetValue(project_frame);
193 max_sample =
static_cast<float>(max_sample * gain);
194 rms_sample =
static_cast<float>(rms_sample * gain);
201 float samples_max = 0.0f;
203 samples_max = std::max(samples_max, std::abs(v));
205 if (samples_max > 0.0f) {
206 data.
scale(total_samples, 1.0f / samples_max);
213 AudioWaveformData AudioWaveformer::ExtractSamplesFromReader(
ReaderBase* source_reader,
int channel,
int num_per_second,
bool normalize) {
216 if (!source_reader || num_per_second <= 0) {
221 if (!source_reader->
IsOpen()) {
222 source_reader->
Open();
225 const auto retry_delay = std::chrono::milliseconds(100);
226 const auto max_wait_for_open = std::chrono::milliseconds(3000);
228 auto get_frame_with_retry = [&](int64_t frame_number) -> std::shared_ptr<openshot::Frame> {
229 std::chrono::steady_clock::time_point wait_start;
230 bool waiting_for_open =
false;
233 return source_reader->
GetFrame(frame_number);
235 auto now = std::chrono::steady_clock::now();
236 if (!waiting_for_open) {
237 waiting_for_open =
true;
239 }
else if (now - wait_start >= max_wait_for_open) {
243 std::this_thread::sleep_for(retry_delay);
249 if (sample_rate <= 0) {
250 sample_rate = num_per_second;
252 int sample_divisor = sample_rate / num_per_second;
253 if (sample_divisor <= 0) {
259 if (reader_video_length < 0) {
260 reader_video_length = 0;
264 float frames_duration = 0.0f;
265 if (reader_video_length > 0 && fps_value > 0.0) {
266 frames_duration =
static_cast<float>(reader_video_length / fps_value);
268 if (reader_duration <= 0.0f) {
269 reader_duration = frames_duration;
271 if (reader_duration < 0.0f) {
272 reader_duration = 0.0f;
279 int total_samples =
static_cast<int>(std::ceil(reader_duration * num_per_second));
280 if (total_samples <= 0 || source_reader->info.channels == 0) {
284 if (channel != -1 && (channel < 0 || channel >= source_reader->
info.
channels)) {
289 data.
resize(total_samples);
290 data.
zero(total_samples);
292 int extracted_index = 0;
293 int sample_index = 0;
294 float samples_max = 0.0f;
295 float chunk_max = 0.0f;
296 double chunk_squared_sum = 0.0;
298 int channel_count = (channel == -1) ? source_reader->
info.
channels : 1;
299 std::vector<float*> channels(source_reader->
info.
channels,
nullptr);
302 for (int64_t f = 1; f <= reader_video_length && extracted_index < total_samples; f++) {
303 std::shared_ptr<openshot::Frame> frame = get_frame_with_retry(f);
305 for (
int channel_index = 0; channel_index < source_reader->
info.
channels; channel_index++) {
306 if (channel == channel_index || channel == -1) {
307 channels[channel_index] = frame->GetAudioSamples(channel_index);
311 int sample_count = frame->GetAudioSamplesCount();
312 for (
int s = 0; s < sample_count; s++) {
313 for (
int channel_index = 0; channel_index < source_reader->
info.
channels; channel_index++) {
314 if (channel == channel_index || channel == -1) {
315 float *samples = channels[channel_index];
319 float abs_sample = std::abs(samples[s]);
320 chunk_squared_sum +=
static_cast<double>(samples[s]) *
static_cast<double>(samples[s]);
321 chunk_max = std::max(chunk_max, abs_sample);
327 if (sample_index % sample_divisor == 0) {
328 float avg_squared_sum = 0.0f;
329 if (channel_count > 0) {
330 avg_squared_sum =
static_cast<float>(chunk_squared_sum /
static_cast<double>(sample_divisor * channel_count));
333 if (extracted_index < total_samples) {
335 data.
rms_samples[extracted_index] = std::sqrt(avg_squared_sum);
336 samples_max = std::max(samples_max, chunk_max);
342 chunk_squared_sum = 0.0;
344 if (extracted_index >= total_samples) {
354 if (sample_index > 0 && extracted_index < total_samples) {
355 float avg_squared_sum = 0.0f;
356 if (channel_count > 0) {
357 avg_squared_sum =
static_cast<float>(chunk_squared_sum /
static_cast<double>(sample_index * channel_count));
361 data.
rms_samples[extracted_index] = std::sqrt(avg_squared_sum);
362 samples_max = std::max(samples_max, chunk_max);
366 if (normalize && samples_max > 0.0f) {
367 float scale = 1.0f / samples_max;
368 data.
scale(total_samples, scale);
375 if (!source_reader) {
381 if (
auto clip =
dynamic_cast<Clip*
>(current)) {
382 current = clip->Reader();
385 if (
auto mapper =
dynamic_cast<FrameMapper*
>(current)) {
386 current = mapper->Reader();
395 if (!source_reader) {
398 return source_reader->
info.
fps;
402 ReaderBase* AudioWaveformer::ResolveWaveformReader() {
403 if (source_initialized) {
404 return resolved_reader ? resolved_reader : reader;
406 source_initialized =
true;
408 resolved_reader = ResolveSourceReader(reader);
411 if (
auto ff_reader =
dynamic_cast<FFmpegReader*
>(resolved_reader)) {
412 const Json::Value ff_json = ff_reader->JsonValue();
413 const std::string
path = ff_json.get(
"path",
"").asString();
416 auto clone = std::make_unique<FFmpegReader>(
path,
false);
417 clone->SetJsonValue(ff_json);
418 clone->info.has_video =
false;
419 detached_reader = std::move(clone);
420 resolved_reader = detached_reader.get();
423 detached_reader.reset();
424 resolved_reader = ResolveSourceReader(reader);
429 return resolved_reader ? resolved_reader : reader;