BPMDetector.swift 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. import Accelerate
  2. import AVFoundation
  3. import Foundation
  4. /// BPM detection using spectral flux onset detection and autocorrelation.
  5. struct BPMDetector {
  6. /// Detect BPM for a track. Analyzes the first 60 seconds.
  7. static func detectBPM(for track: Track) async throws -> Double {
  8. let url = track.fileURL
  9. let file = try AVAudioFile(forReading: url)
  10. let format = file.processingFormat
  11. let sampleRate = format.sampleRate
  12. let maxFrames = AVAudioFrameCount(min(Double(file.length), sampleRate * 60))
  13. guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: maxFrames) else {
  14. return 0
  15. }
  16. try file.read(into: buffer, frameCount: maxFrames)
  17. guard let floatData = buffer.floatChannelData else { return 0 }
  18. let frameCount = Int(buffer.frameLength)
  19. // Mix to mono
  20. var mono = [Float](repeating: 0, count: frameCount)
  21. let channels = Int(format.channelCount)
  22. for ch in 0..<channels {
  23. let ptr = floatData[ch]
  24. for i in 0..<frameCount {
  25. mono[i] += ptr[i]
  26. }
  27. }
  28. if channels > 1 {
  29. var div = Float(channels)
  30. vDSP_vsdiv(mono, 1, &div, &mono, 1, vDSP_Length(frameCount))
  31. }
  32. // FFT-based spectral flux
  33. let fftSize = 2048
  34. let hopSize = 512
  35. let log2n = vDSP_Length(log2(Double(fftSize)))
  36. guard let fftSetup = vDSP_create_fftsetup(log2n, FFTRadix(kFFTRadix2)) else { return 0 }
  37. defer { vDSP_destroy_fftsetup(fftSetup) }
  38. let halfN = fftSize / 2
  39. var window = [Float](repeating: 0, count: fftSize)
  40. vDSP_hann_window(&window, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM))
  41. var prevMagnitudes = [Float](repeating: 0, count: halfN)
  42. var onsetSignal: [Float] = []
  43. var position = 0
  44. while position + fftSize <= frameCount {
  45. var frame = Array(mono[position..<position + fftSize])
  46. vDSP_vmul(frame, 1, window, 1, &frame, 1, vDSP_Length(fftSize))
  47. var real = [Float](repeating: 0, count: halfN)
  48. var imag = [Float](repeating: 0, count: halfN)
  49. frame.withUnsafeBufferPointer { framePtr in
  50. real.withUnsafeMutableBufferPointer { realPtr in
  51. imag.withUnsafeMutableBufferPointer { imagPtr in
  52. var splitComplex = DSPSplitComplex(realp: realPtr.baseAddress!, imagp: imagPtr.baseAddress!)
  53. framePtr.baseAddress!.withMemoryRebound(to: DSPComplex.self, capacity: halfN) { complexPtr in
  54. vDSP_ctoz(complexPtr, 2, &splitComplex, 1, vDSP_Length(halfN))
  55. }
  56. vDSP_fft_zrip(fftSetup, &splitComplex, 1, log2n, FFTDirection(kFFTDirection_Forward))
  57. }
  58. }
  59. }
  60. // Magnitudes
  61. var magnitudes = [Float](repeating: 0, count: halfN)
  62. real.withUnsafeBufferPointer { rPtr in
  63. imag.withUnsafeBufferPointer { iPtr in
  64. var split = DSPSplitComplex(realp: UnsafeMutablePointer(mutating: rPtr.baseAddress!),
  65. imagp: UnsafeMutablePointer(mutating: iPtr.baseAddress!))
  66. vDSP_zvabs(&split, 1, &magnitudes, 1, vDSP_Length(halfN))
  67. }
  68. }
  69. // Spectral flux (positive differences only)
  70. var flux: Float = 0
  71. for i in 0..<halfN {
  72. let diff = magnitudes[i] - prevMagnitudes[i]
  73. if diff > 0 { flux += diff }
  74. }
  75. onsetSignal.append(flux)
  76. prevMagnitudes = magnitudes
  77. position += hopSize
  78. }
  79. guard onsetSignal.count > 2 else { return 0 }
  80. // Autocorrelation for BPM
  81. let onsetRate = sampleRate / Double(hopSize)
  82. let minLag = Int(onsetRate * 60.0 / 200.0) // 200 BPM
  83. let maxLag = Int(onsetRate * 60.0 / 60.0) // 60 BPM
  84. guard maxLag < onsetSignal.count else { return 0 }
  85. var bestLag = minLag
  86. var bestCorrelation: Float = -Float.greatestFiniteMagnitude
  87. for lag in minLag...maxLag {
  88. var correlation: Float = 0
  89. let count = vDSP_Length(onsetSignal.count - lag)
  90. onsetSignal.withUnsafeBufferPointer { ptr in
  91. let a = ptr.baseAddress!
  92. let b = ptr.baseAddress! + lag
  93. vDSP_dotpr(a, 1, b, 1, &correlation, count)
  94. }
  95. if correlation > bestCorrelation {
  96. bestCorrelation = correlation
  97. bestLag = lag
  98. }
  99. }
  100. let bpm = (onsetRate * 60.0) / Double(bestLag)
  101. // Normalize to reasonable range
  102. var normalizedBPM = bpm
  103. while normalizedBPM > 200 { normalizedBPM /= 2 }
  104. while normalizedBPM < 60 { normalizedBPM *= 2 }
  105. return (normalizedBPM * 10).rounded() / 10
  106. }
  107. }