import Accelerate import AVFoundation import Foundation /// Detects BPM from audio files using energy-based onset detection with autocorrelation. struct BPMDetector { // MARK: - Configuration /// Analysis window size (samples). Larger = more frequency resolution, less time resolution. private static let fftSize = 1024 /// Hop size between analysis windows. private static let hopSize = 512 /// Minimum BPM to consider. private static let minBPM: Double = 60 /// Maximum BPM to consider. private static let maxBPM: Double = 200 // MARK: - Public API /// Analyze a track's BPM. Runs on a background thread. static func detectBPM(for track: Track) async throws -> Double { let url = track.fileURL return try await detectBPM(fileURL: url) } /// Analyze BPM from a file URL. static func detectBPM(fileURL: URL) async throws -> Double { try await Task.detached(priority: .userInitiated) { let sampleRate: Double let samples: [Float] if OGGDecoder.isOGGFile(fileURL) { let result = try OGGDecoder.readMonoSamples(url: fileURL, maxSeconds: 60) sampleRate = result.sampleRate samples = result.samples } else { let audioFile = try AVAudioFile(forReading: fileURL) sampleRate = audioFile.processingFormat.sampleRate samples = try readMonoSamples(from: audioFile, maxSeconds: 60) } guard samples.count > fftSize * 2 else { throw BPMError.insufficientAudio } // Step 1: Compute spectral flux (onset detection function) let flux = computeSpectralFlux(samples: samples) // Step 2: Normalize flux let normalizedFlux = normalize(flux) // Step 3: Autocorrelation to find periodicity let bpm = findBPMFromAutocorrelation( onsetFunction: normalizedFlux, hopRate: sampleRate / Double(hopSize) ) return bpm }.value } // MARK: - Audio Reading private static func readMonoSamples(from audioFile: AVAudioFile, maxSeconds: Double) throws -> [Float] { let sampleRate = audioFile.processingFormat.sampleRate let maxFrames = AVAudioFrameCount(min(Double(audioFile.length), sampleRate * maxSeconds)) guard let format = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: 1), let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: maxFrames) else { throw BPMError.formatError } audioFile.framePosition = 0 try audioFile.read(into: buffer, frameCount: maxFrames) guard let channelData = buffer.floatChannelData else { throw BPMError.noAudioData } return Array(UnsafeBufferPointer(start: channelData[0], count: Int(buffer.frameLength))) } // MARK: - Spectral Flux private static func computeSpectralFlux(samples: [Float]) -> [Float] { let halfFFT = fftSize / 2 let log2n = vDSP_Length(log2(Double(fftSize))) guard let fftSetup = vDSP_create_fftsetup(log2n, FFTRadix(kFFTRadix2)) else { return [] } defer { vDSP_destroy_fftsetup(fftSetup) } let numFrames = (samples.count - fftSize) / hopSize + 1 guard numFrames > 1 else { return [] } var window = [Float](repeating: 0, count: fftSize) vDSP_hann_window(&window, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM)) var previousMagnitudes = [Float](repeating: 0, count: halfFFT) var flux = [Float]() flux.reserveCapacity(numFrames) var real = [Float](repeating: 0, count: halfFFT) var imag = [Float](repeating: 0, count: halfFFT) for frameIndex in 0.. Double { let n = onsetFunction.count guard n > 0 else { return 120 } // Lag range in frames corresponding to BPM range let minLag = max(1, Int(hopRate * 60.0 / maxBPM)) let maxLag = min(n - 1, Int(hopRate * 60.0 / minBPM)) guard minLag < maxLag else { return 120 } // Compute autocorrelation for relevant lags var bestLag = minLag var bestCorrelation: Float = -.greatestFiniteMagnitude for lag in minLag...maxLag { var correlation: Float = 0 let length = vDSP_Length(n - lag) onsetFunction.withUnsafeBufferPointer { buf in vDSP_dotpr( buf.baseAddress!, 1, buf.baseAddress!.advanced(by: lag), 1, &correlation, length ) } // Normalize by overlap length correlation /= Float(n - lag) if correlation > bestCorrelation { bestCorrelation = correlation bestLag = lag } } // Convert lag to BPM let bpm = hopRate * 60.0 / Double(bestLag) // If BPM is very low, it might be detecting half-time — double it if bpm < 80 { return bpm * 2 } // If very high, might be double-time — halve it if bpm > 180 { return bpm / 2 } return (bpm * 10).rounded() / 10 // round to 1 decimal } // MARK: - Normalize private static func normalize(_ data: [Float]) -> [Float] { guard !data.isEmpty else { return [] } var minVal: Float = 0 var maxVal: Float = 0 vDSP_minv(data, 1, &minVal, vDSP_Length(data.count)) vDSP_maxv(data, 1, &maxVal, vDSP_Length(data.count)) let range = maxVal - minVal guard range > 0 else { return [Float](repeating: 0, count: data.count) } var result = [Float](repeating: 0, count: data.count) var negMin = -minVal vDSP_vsadd(data, 1, &negMin, &result, 1, vDSP_Length(data.count)) var scale = 1.0 / range vDSP_vsmul(result, 1, &scale, &result, 1, vDSP_Length(data.count)) return result } } // MARK: - Errors enum BPMError: Error, LocalizedError { case insufficientAudio case formatError case noAudioData var errorDescription: String? { switch self { case .insufficientAudio: return "Audio file is too short for BPM analysis" case .formatError: return "Unable to read audio format" case .noAudioData: return "No audio data found in file" } } }