| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129 |
- import Accelerate
- import AVFoundation
- import Foundation
- /// BPM detection using spectral flux onset detection and autocorrelation.
- struct BPMDetector {
- /// Detect BPM for a track. Analyzes the first 60 seconds.
- static func detectBPM(for track: Track) async throws -> Double {
- let url = track.fileURL
- let file = try AVAudioFile(forReading: url)
- let format = file.processingFormat
- let sampleRate = format.sampleRate
- let maxFrames = AVAudioFrameCount(min(Double(file.length), sampleRate * 60))
- guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: maxFrames) else {
- return 0
- }
- try file.read(into: buffer, frameCount: maxFrames)
- guard let floatData = buffer.floatChannelData else { return 0 }
- let frameCount = Int(buffer.frameLength)
- // Mix to mono
- var mono = [Float](repeating: 0, count: frameCount)
- let channels = Int(format.channelCount)
- for ch in 0..<channels {
- let ptr = floatData[ch]
- for i in 0..<frameCount {
- mono[i] += ptr[i]
- }
- }
- if channels > 1 {
- var div = Float(channels)
- vDSP_vsdiv(mono, 1, &div, &mono, 1, vDSP_Length(frameCount))
- }
- // FFT-based spectral flux
- let fftSize = 2048
- let hopSize = 512
- let log2n = vDSP_Length(log2(Double(fftSize)))
- guard let fftSetup = vDSP_create_fftsetup(log2n, FFTRadix(kFFTRadix2)) else { return 0 }
- defer { vDSP_destroy_fftsetup(fftSetup) }
- let halfN = fftSize / 2
- var window = [Float](repeating: 0, count: fftSize)
- vDSP_hann_window(&window, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM))
- var prevMagnitudes = [Float](repeating: 0, count: halfN)
- var onsetSignal: [Float] = []
- var position = 0
- while position + fftSize <= frameCount {
- var frame = Array(mono[position..<position + fftSize])
- vDSP_vmul(frame, 1, window, 1, &frame, 1, vDSP_Length(fftSize))
- var real = [Float](repeating: 0, count: halfN)
- var imag = [Float](repeating: 0, count: halfN)
- frame.withUnsafeBufferPointer { framePtr in
- real.withUnsafeMutableBufferPointer { realPtr in
- imag.withUnsafeMutableBufferPointer { imagPtr in
- var splitComplex = DSPSplitComplex(realp: realPtr.baseAddress!, imagp: imagPtr.baseAddress!)
- framePtr.baseAddress!.withMemoryRebound(to: DSPComplex.self, capacity: halfN) { complexPtr in
- vDSP_ctoz(complexPtr, 2, &splitComplex, 1, vDSP_Length(halfN))
- }
- vDSP_fft_zrip(fftSetup, &splitComplex, 1, log2n, FFTDirection(kFFTDirection_Forward))
- }
- }
- }
- // Magnitudes
- var magnitudes = [Float](repeating: 0, count: halfN)
- real.withUnsafeBufferPointer { rPtr in
- imag.withUnsafeBufferPointer { iPtr in
- var split = DSPSplitComplex(realp: UnsafeMutablePointer(mutating: rPtr.baseAddress!),
- imagp: UnsafeMutablePointer(mutating: iPtr.baseAddress!))
- vDSP_zvabs(&split, 1, &magnitudes, 1, vDSP_Length(halfN))
- }
- }
- // Spectral flux (positive differences only)
- var flux: Float = 0
- for i in 0..<halfN {
- let diff = magnitudes[i] - prevMagnitudes[i]
- if diff > 0 { flux += diff }
- }
- onsetSignal.append(flux)
- prevMagnitudes = magnitudes
- position += hopSize
- }
- guard onsetSignal.count > 2 else { return 0 }
- // Autocorrelation for BPM
- let onsetRate = sampleRate / Double(hopSize)
- let minLag = Int(onsetRate * 60.0 / 200.0) // 200 BPM
- let maxLag = Int(onsetRate * 60.0 / 60.0) // 60 BPM
- guard maxLag < onsetSignal.count else { return 0 }
- var bestLag = minLag
- var bestCorrelation: Float = -Float.greatestFiniteMagnitude
- for lag in minLag...maxLag {
- var correlation: Float = 0
- let count = vDSP_Length(onsetSignal.count - lag)
- onsetSignal.withUnsafeBufferPointer { ptr in
- let a = ptr.baseAddress!
- let b = ptr.baseAddress! + lag
- vDSP_dotpr(a, 1, b, 1, &correlation, count)
- }
- if correlation > bestCorrelation {
- bestCorrelation = correlation
- bestLag = lag
- }
- }
- let bpm = (onsetRate * 60.0) / Double(bestLag)
- // Normalize to reasonable range
- var normalizedBPM = bpm
- while normalizedBPM > 200 { normalizedBPM /= 2 }
- while normalizedBPM < 60 { normalizedBPM *= 2 }
- return (normalizedBPM * 10).rounded() / 10
- }
- }
|