| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235 |
- import Accelerate
- import AVFoundation
- import Foundation
- /// Detects BPM from audio files using energy-based onset detection with autocorrelation.
- struct BPMDetector {
- // MARK: - Configuration
- /// Analysis window size (samples). Larger = more frequency resolution, less time resolution.
- private static let fftSize = 1024
- /// Hop size between analysis windows.
- private static let hopSize = 512
- /// Minimum BPM to consider.
- private static let minBPM: Double = 60
- /// Maximum BPM to consider.
- private static let maxBPM: Double = 200
- // MARK: - Public API
- /// Analyze a track's BPM. Runs on a background thread.
- static func detectBPM(for track: Track) async throws -> Double {
- let url = track.fileURL
- return try await detectBPM(fileURL: url)
- }
- /// Analyze BPM from a file URL.
- static func detectBPM(fileURL: URL) async throws -> Double {
- try await Task.detached(priority: .userInitiated) {
- let sampleRate: Double
- let samples: [Float]
- if OGGDecoder.isOGGFile(fileURL) {
- let result = try OGGDecoder.readMonoSamples(url: fileURL, maxSeconds: 60)
- sampleRate = result.sampleRate
- samples = result.samples
- } else {
- let audioFile = try AVAudioFile(forReading: fileURL)
- sampleRate = audioFile.processingFormat.sampleRate
- samples = try readMonoSamples(from: audioFile, maxSeconds: 60)
- }
- guard samples.count > fftSize * 2 else {
- throw BPMError.insufficientAudio
- }
- // Step 1: Compute spectral flux (onset detection function)
- let flux = computeSpectralFlux(samples: samples)
- // Step 2: Normalize flux
- let normalizedFlux = normalize(flux)
- // Step 3: Autocorrelation to find periodicity
- let bpm = findBPMFromAutocorrelation(
- onsetFunction: normalizedFlux,
- hopRate: sampleRate / Double(hopSize)
- )
- return bpm
- }.value
- }
- // MARK: - Audio Reading
- private static func readMonoSamples(from audioFile: AVAudioFile, maxSeconds: Double) throws -> [Float] {
- let sampleRate = audioFile.processingFormat.sampleRate
- let maxFrames = AVAudioFrameCount(min(Double(audioFile.length), sampleRate * maxSeconds))
- guard let format = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: 1),
- let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: maxFrames) else {
- throw BPMError.formatError
- }
- audioFile.framePosition = 0
- try audioFile.read(into: buffer, frameCount: maxFrames)
- guard let channelData = buffer.floatChannelData else {
- throw BPMError.noAudioData
- }
- return Array(UnsafeBufferPointer(start: channelData[0], count: Int(buffer.frameLength)))
- }
- // MARK: - Spectral Flux
- private static func computeSpectralFlux(samples: [Float]) -> [Float] {
- let halfFFT = fftSize / 2
- let log2n = vDSP_Length(log2(Double(fftSize)))
- guard let fftSetup = vDSP_create_fftsetup(log2n, FFTRadix(kFFTRadix2)) else { return [] }
- defer { vDSP_destroy_fftsetup(fftSetup) }
- let numFrames = (samples.count - fftSize) / hopSize + 1
- guard numFrames > 1 else { return [] }
- var window = [Float](repeating: 0, count: fftSize)
- vDSP_hann_window(&window, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM))
- var previousMagnitudes = [Float](repeating: 0, count: halfFFT)
- var flux = [Float]()
- flux.reserveCapacity(numFrames)
- var real = [Float](repeating: 0, count: halfFFT)
- var imag = [Float](repeating: 0, count: halfFFT)
- for frameIndex in 0..<numFrames {
- let offset = frameIndex * hopSize
- let end = offset + fftSize
- guard end <= samples.count else { break }
- // Window the frame
- var frame = Array(samples[offset..<end])
- vDSP_vmul(frame, 1, window, 1, &frame, 1, vDSP_Length(fftSize))
- // Pack for FFT
- frame.withUnsafeMutableBufferPointer { framePtr in
- framePtr.baseAddress!.withMemoryRebound(to: DSPComplex.self, capacity: halfFFT) { complexPtr in
- var splitComplex = DSPSplitComplex(realp: &real, imagp: &imag)
- vDSP_ctoz(complexPtr, 2, &splitComplex, 1, vDSP_Length(halfFFT))
- }
- }
- // FFT
- var splitComplex = DSPSplitComplex(realp: &real, imagp: &imag)
- vDSP_fft_zrip(fftSetup, &splitComplex, 1, log2n, FFTDirection(kFFTDirection_Forward))
- // Magnitudes
- var magnitudes = [Float](repeating: 0, count: halfFFT)
- vDSP_zvmags(&splitComplex, 1, &magnitudes, 1, vDSP_Length(halfFFT))
- // Spectral flux: sum of positive differences
- var diff = [Float](repeating: 0, count: halfFFT)
- vDSP_vsub(previousMagnitudes, 1, magnitudes, 1, &diff, 1, vDSP_Length(halfFFT))
- // Half-wave rectify (keep only positive changes)
- var threshold: Float = 0
- vDSP_vthres(diff, 1, &threshold, &diff, 1, vDSP_Length(halfFFT))
- var sum: Float = 0
- vDSP_sve(diff, 1, &sum, vDSP_Length(halfFFT))
- flux.append(sum)
- previousMagnitudes = magnitudes
- }
- return flux
- }
- // MARK: - Autocorrelation
- private static func findBPMFromAutocorrelation(onsetFunction: [Float], hopRate: Double) -> Double {
- let n = onsetFunction.count
- guard n > 0 else { return 120 }
- // Lag range in frames corresponding to BPM range
- let minLag = max(1, Int(hopRate * 60.0 / maxBPM))
- let maxLag = min(n - 1, Int(hopRate * 60.0 / minBPM))
- guard minLag < maxLag else { return 120 }
- // Compute autocorrelation for relevant lags
- var bestLag = minLag
- var bestCorrelation: Float = -.greatestFiniteMagnitude
- for lag in minLag...maxLag {
- var correlation: Float = 0
- let length = vDSP_Length(n - lag)
- onsetFunction.withUnsafeBufferPointer { buf in
- vDSP_dotpr(
- buf.baseAddress!, 1,
- buf.baseAddress!.advanced(by: lag), 1,
- &correlation,
- length
- )
- }
- // Normalize by overlap length
- correlation /= Float(n - lag)
- if correlation > bestCorrelation {
- bestCorrelation = correlation
- bestLag = lag
- }
- }
- // Convert lag to BPM
- let bpm = hopRate * 60.0 / Double(bestLag)
- // If BPM is very low, it might be detecting half-time — double it
- if bpm < 80 { return bpm * 2 }
- // If very high, might be double-time — halve it
- if bpm > 180 { return bpm / 2 }
- return (bpm * 10).rounded() / 10 // round to 1 decimal
- }
- // MARK: - Normalize
- private static func normalize(_ data: [Float]) -> [Float] {
- guard !data.isEmpty else { return [] }
- var minVal: Float = 0
- var maxVal: Float = 0
- vDSP_minv(data, 1, &minVal, vDSP_Length(data.count))
- vDSP_maxv(data, 1, &maxVal, vDSP_Length(data.count))
- let range = maxVal - minVal
- guard range > 0 else { return [Float](repeating: 0, count: data.count) }
- var result = [Float](repeating: 0, count: data.count)
- var negMin = -minVal
- vDSP_vsadd(data, 1, &negMin, &result, 1, vDSP_Length(data.count))
- var scale = 1.0 / range
- vDSP_vsmul(result, 1, &scale, &result, 1, vDSP_Length(data.count))
- return result
- }
- }
- // MARK: - Errors
- enum BPMError: Error, LocalizedError {
- case insufficientAudio
- case formatError
- case noAudioData
- var errorDescription: String? {
- switch self {
- case .insufficientAudio: return "Audio file is too short for BPM analysis"
- case .formatError: return "Unable to read audio format"
- case .noAudioData: return "No audio data found in file"
- }
- }
- }
|