KeyDetector.swift 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. import Accelerate
  2. import AVFoundation
  3. import Foundation
  4. /// Musical key detection using chromagram analysis and key profile matching.
  5. struct KeyDetector {
  6. struct KeyResult {
  7. let key: String // e.g. "C Major", "A Minor"
  8. let shortKey: String // e.g. "C", "Am"
  9. let camelotCode: String // e.g. "8B", "1A"
  10. let confidence: Double // 0.0 - 1.0
  11. }
  12. private static let noteNames = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
  13. // Krumhansl-Kessler major/minor profiles
  14. private static let majorProfile: [Double] = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]
  15. private static let minorProfile: [Double] = [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]
  16. private static let camelotMajor = ["8B", "3B", "10B", "5B", "12B", "7B", "2B", "9B", "4B", "11B", "6B", "1B"]
  17. private static let camelotMinor = ["5A", "12A", "7A", "2A", "9A", "4A", "11A", "6A", "1A", "8A", "3A", "10A"]
  18. /// Detect the musical key of a track.
  19. static func detectKey(for track: Track) async throws -> KeyResult {
  20. let url = track.fileURL
  21. let file = try AVAudioFile(forReading: url)
  22. let format = file.processingFormat
  23. let sampleRate = format.sampleRate
  24. let maxFrames = AVAudioFrameCount(min(Double(file.length), sampleRate * 30))
  25. guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: maxFrames) else {
  26. return KeyResult(key: "Unknown", shortKey: "?", camelotCode: "?", confidence: 0)
  27. }
  28. try file.read(into: buffer, frameCount: maxFrames)
  29. guard let floatData = buffer.floatChannelData else {
  30. return KeyResult(key: "Unknown", shortKey: "?", camelotCode: "?", confidence: 0)
  31. }
  32. let frameCount = Int(buffer.frameLength)
  33. var mono = [Float](repeating: 0, count: frameCount)
  34. let channels = Int(format.channelCount)
  35. for ch in 0..<channels {
  36. let ptr = floatData[ch]
  37. for i in 0..<frameCount { mono[i] += ptr[i] }
  38. }
  39. if channels > 1 {
  40. var div = Float(channels)
  41. vDSP_vsdiv(mono, 1, &div, &mono, 1, vDSP_Length(frameCount))
  42. }
  43. // Compute chromagram
  44. let fftSize = 8192
  45. let hopSize = 4096
  46. let log2n = vDSP_Length(log2(Double(fftSize)))
  47. guard let fftSetup = vDSP_create_fftsetup(log2n, FFTRadix(kFFTRadix2)) else {
  48. return KeyResult(key: "Unknown", shortKey: "?", camelotCode: "?", confidence: 0)
  49. }
  50. defer { vDSP_destroy_fftsetup(fftSetup) }
  51. let halfN = fftSize / 2
  52. var chromagram = [Double](repeating: 0, count: 12)
  53. var frameCount2 = 0
  54. var position = 0
  55. while position + fftSize <= frameCount {
  56. var frame = Array(mono[position..<position + fftSize])
  57. var window = [Float](repeating: 0, count: fftSize)
  58. vDSP_hann_window(&window, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM))
  59. vDSP_vmul(frame, 1, window, 1, &frame, 1, vDSP_Length(fftSize))
  60. var real = [Float](repeating: 0, count: halfN)
  61. var imag = [Float](repeating: 0, count: halfN)
  62. frame.withUnsafeBufferPointer { fPtr in
  63. real.withUnsafeMutableBufferPointer { rPtr in
  64. imag.withUnsafeMutableBufferPointer { iPtr in
  65. var split = DSPSplitComplex(realp: rPtr.baseAddress!, imagp: iPtr.baseAddress!)
  66. fPtr.baseAddress!.withMemoryRebound(to: DSPComplex.self, capacity: halfN) { cPtr in
  67. vDSP_ctoz(cPtr, 2, &split, 1, vDSP_Length(halfN))
  68. }
  69. vDSP_fft_zrip(fftSetup, &split, 1, log2n, FFTDirection(kFFTDirection_Forward))
  70. }
  71. }
  72. }
  73. var magnitudes = [Float](repeating: 0, count: halfN)
  74. real.withUnsafeBufferPointer { rPtr in
  75. imag.withUnsafeBufferPointer { iPtr in
  76. var split = DSPSplitComplex(
  77. realp: UnsafeMutablePointer(mutating: rPtr.baseAddress!),
  78. imagp: UnsafeMutablePointer(mutating: iPtr.baseAddress!))
  79. vDSP_zvabs(&split, 1, &magnitudes, 1, vDSP_Length(halfN))
  80. }
  81. }
  82. // Map frequency bins to pitch classes
  83. for bin in 1..<halfN {
  84. let freq = Double(bin) * sampleRate / Double(fftSize)
  85. guard freq > 60 && freq < 5000 else { continue }
  86. let midiNote = 69.0 + 12.0 * log2(freq / 440.0)
  87. let pitchClass = Int(round(midiNote)) % 12
  88. let normalizedClass = (pitchClass + 12) % 12
  89. chromagram[normalizedClass] += Double(magnitudes[bin])
  90. }
  91. frameCount2 += 1
  92. position += hopSize
  93. }
  94. guard frameCount2 > 0 else {
  95. return KeyResult(key: "Unknown", shortKey: "?", camelotCode: "?", confidence: 0)
  96. }
  97. for i in 0..<12 { chromagram[i] /= Double(frameCount2) }
  98. // Match against key profiles using Pearson correlation
  99. var bestCorrelation = -Double.greatestFiniteMagnitude
  100. var bestKey = 0
  101. var bestIsMajor = true
  102. for root in 0..<12 {
  103. let rotated = (0..<12).map { chromagram[($0 + root) % 12] }
  104. let majorCorr = pearsonCorrelation(rotated, majorProfile)
  105. let minorCorr = pearsonCorrelation(rotated, minorProfile)
  106. if majorCorr > bestCorrelation {
  107. bestCorrelation = majorCorr
  108. bestKey = root
  109. bestIsMajor = true
  110. }
  111. if minorCorr > bestCorrelation {
  112. bestCorrelation = minorCorr
  113. bestKey = root
  114. bestIsMajor = false
  115. }
  116. }
  117. let confidence = max(0, min(1, (bestCorrelation + 1) / 2))
  118. let noteName = noteNames[bestKey]
  119. let keyName = bestIsMajor ? "\(noteName) Major" : "\(noteName) Minor"
  120. let shortKey = bestIsMajor ? noteName : "\(noteName)m"
  121. let camelot = bestIsMajor ? camelotMajor[bestKey] : camelotMinor[bestKey]
  122. return KeyResult(key: keyName, shortKey: shortKey, camelotCode: camelot, confidence: confidence)
  123. }
  124. private static func pearsonCorrelation(_ a: [Double], _ b: [Double]) -> Double {
  125. let n = Double(a.count)
  126. let sumA = a.reduce(0, +)
  127. let sumB = b.reduce(0, +)
  128. let sumAB = zip(a, b).map(*).reduce(0, +)
  129. let sumA2 = a.map { $0 * $0 }.reduce(0, +)
  130. let sumB2 = b.map { $0 * $0 }.reduce(0, +)
  131. let numerator = n * sumAB - sumA * sumB
  132. let denominator = sqrt((n * sumA2 - sumA * sumA) * (n * sumB2 - sumB * sumB))
  133. guard denominator > 0 else { return 0 }
  134. return numerator / denominator
  135. }
  136. }