| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161 |
- import Accelerate
- import AVFoundation
- import Foundation
- /// Musical key detection using chromagram analysis and key profile matching.
- struct KeyDetector {
- struct KeyResult {
- let key: String // e.g. "C Major", "A Minor"
- let shortKey: String // e.g. "C", "Am"
- let camelotCode: String // e.g. "8B", "1A"
- let confidence: Double // 0.0 - 1.0
- }
- private static let noteNames = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
- // Krumhansl-Kessler major/minor profiles
- private static let majorProfile: [Double] = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]
- private static let minorProfile: [Double] = [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]
- private static let camelotMajor = ["8B", "3B", "10B", "5B", "12B", "7B", "2B", "9B", "4B", "11B", "6B", "1B"]
- private static let camelotMinor = ["5A", "12A", "7A", "2A", "9A", "4A", "11A", "6A", "1A", "8A", "3A", "10A"]
- /// Detect the musical key of a track.
- static func detectKey(for track: Track) async throws -> KeyResult {
- let url = track.fileURL
- let file = try AVAudioFile(forReading: url)
- let format = file.processingFormat
- let sampleRate = format.sampleRate
- let maxFrames = AVAudioFrameCount(min(Double(file.length), sampleRate * 30))
- guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: maxFrames) else {
- return KeyResult(key: "Unknown", shortKey: "?", camelotCode: "?", confidence: 0)
- }
- try file.read(into: buffer, frameCount: maxFrames)
- guard let floatData = buffer.floatChannelData else {
- return KeyResult(key: "Unknown", shortKey: "?", camelotCode: "?", confidence: 0)
- }
- let frameCount = Int(buffer.frameLength)
- var mono = [Float](repeating: 0, count: frameCount)
- let channels = Int(format.channelCount)
- for ch in 0..<channels {
- let ptr = floatData[ch]
- for i in 0..<frameCount { mono[i] += ptr[i] }
- }
- if channels > 1 {
- var div = Float(channels)
- vDSP_vsdiv(mono, 1, &div, &mono, 1, vDSP_Length(frameCount))
- }
- // Compute chromagram
- let fftSize = 8192
- let hopSize = 4096
- let log2n = vDSP_Length(log2(Double(fftSize)))
- guard let fftSetup = vDSP_create_fftsetup(log2n, FFTRadix(kFFTRadix2)) else {
- return KeyResult(key: "Unknown", shortKey: "?", camelotCode: "?", confidence: 0)
- }
- defer { vDSP_destroy_fftsetup(fftSetup) }
- let halfN = fftSize / 2
- var chromagram = [Double](repeating: 0, count: 12)
- var frameCount2 = 0
- var position = 0
- while position + fftSize <= frameCount {
- var frame = Array(mono[position..<position + fftSize])
- var window = [Float](repeating: 0, count: fftSize)
- vDSP_hann_window(&window, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM))
- vDSP_vmul(frame, 1, window, 1, &frame, 1, vDSP_Length(fftSize))
- var real = [Float](repeating: 0, count: halfN)
- var imag = [Float](repeating: 0, count: halfN)
- frame.withUnsafeBufferPointer { fPtr in
- real.withUnsafeMutableBufferPointer { rPtr in
- imag.withUnsafeMutableBufferPointer { iPtr in
- var split = DSPSplitComplex(realp: rPtr.baseAddress!, imagp: iPtr.baseAddress!)
- fPtr.baseAddress!.withMemoryRebound(to: DSPComplex.self, capacity: halfN) { cPtr in
- vDSP_ctoz(cPtr, 2, &split, 1, vDSP_Length(halfN))
- }
- vDSP_fft_zrip(fftSetup, &split, 1, log2n, FFTDirection(kFFTDirection_Forward))
- }
- }
- }
- var magnitudes = [Float](repeating: 0, count: halfN)
- real.withUnsafeBufferPointer { rPtr in
- imag.withUnsafeBufferPointer { iPtr in
- var split = DSPSplitComplex(
- realp: UnsafeMutablePointer(mutating: rPtr.baseAddress!),
- imagp: UnsafeMutablePointer(mutating: iPtr.baseAddress!))
- vDSP_zvabs(&split, 1, &magnitudes, 1, vDSP_Length(halfN))
- }
- }
- // Map frequency bins to pitch classes
- for bin in 1..<halfN {
- let freq = Double(bin) * sampleRate / Double(fftSize)
- guard freq > 60 && freq < 5000 else { continue }
- let midiNote = 69.0 + 12.0 * log2(freq / 440.0)
- let pitchClass = Int(round(midiNote)) % 12
- let normalizedClass = (pitchClass + 12) % 12
- chromagram[normalizedClass] += Double(magnitudes[bin])
- }
- frameCount2 += 1
- position += hopSize
- }
- guard frameCount2 > 0 else {
- return KeyResult(key: "Unknown", shortKey: "?", camelotCode: "?", confidence: 0)
- }
- for i in 0..<12 { chromagram[i] /= Double(frameCount2) }
- // Match against key profiles using Pearson correlation
- var bestCorrelation = -Double.greatestFiniteMagnitude
- var bestKey = 0
- var bestIsMajor = true
- for root in 0..<12 {
- let rotated = (0..<12).map { chromagram[($0 + root) % 12] }
- let majorCorr = pearsonCorrelation(rotated, majorProfile)
- let minorCorr = pearsonCorrelation(rotated, minorProfile)
- if majorCorr > bestCorrelation {
- bestCorrelation = majorCorr
- bestKey = root
- bestIsMajor = true
- }
- if minorCorr > bestCorrelation {
- bestCorrelation = minorCorr
- bestKey = root
- bestIsMajor = false
- }
- }
- let confidence = max(0, min(1, (bestCorrelation + 1) / 2))
- let noteName = noteNames[bestKey]
- let keyName = bestIsMajor ? "\(noteName) Major" : "\(noteName) Minor"
- let shortKey = bestIsMajor ? noteName : "\(noteName)m"
- let camelot = bestIsMajor ? camelotMajor[bestKey] : camelotMinor[bestKey]
- return KeyResult(key: keyName, shortKey: shortKey, camelotCode: camelot, confidence: confidence)
- }
- private static func pearsonCorrelation(_ a: [Double], _ b: [Double]) -> Double {
- let n = Double(a.count)
- let sumA = a.reduce(0, +)
- let sumB = b.reduce(0, +)
- let sumAB = zip(a, b).map(*).reduce(0, +)
- let sumA2 = a.map { $0 * $0 }.reduce(0, +)
- let sumB2 = b.map { $0 * $0 }.reduce(0, +)
- let numerator = n * sumAB - sumA * sumB
- let denominator = sqrt((n * sumA2 - sumA * sumA) * (n * sumB2 - sumB * sumB))
- guard denominator > 0 else { return 0 }
- return numerator / denominator
- }
- }
|