import Accelerate import AVFoundation import Foundation /// Detects the musical key of an audio file using chromagram analysis /// with Krumhansl-Kessler key profiles. struct KeyDetector { // MARK: - Key Profiles (Krumhansl-Kessler) /// Major key profile weights for each pitch class (C, C#, D, ..., B). private static let majorProfile: [Double] = [ 6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88 ] /// Minor key profile weights for each pitch class. private static let minorProfile: [Double] = [ 6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17 ] /// Note names for Camelot-compatible display. private static let noteNames = ["C", "C#", "D", "Eb", "E", "F", "F#", "G", "Ab", "A", "Bb", "B"] /// Camelot wheel codes for DJ-friendly key display. private static let camelotMajor = ["8B", "3B", "10B", "5B", "12B", "7B", "2B", "9B", "4B", "11B", "6B", "1B"] private static let camelotMinor = ["5A", "12A", "7A", "2A", "9A", "4A", "11A", "6A", "1A", "8A", "3A", "10A"] // MARK: - Configuration private static let fftSize = 4096 private static let hopSize = 2048 private static let referenceFrequency: Double = 440.0 // A4 // MARK: - Result struct KeyResult { let key: String // e.g., "C Major" or "A Minor" let camelotCode: String // e.g., "8B" or "8A" let confidence: Double // 0.0 to 1.0 let rootNote: Int // pitch class index 0-11 let isMinor: Bool var shortKey: String { let note = KeyDetector.noteNames[rootNote] return "\(note)\(isMinor ? "m" : "")" } } // MARK: - Public API static func detectKey(for track: Track) async throws -> KeyResult { try await detectKey(fileURL: track.fileURL) } static func detectKey(fileURL: URL) async throws -> KeyResult { try await Task.detached(priority: .userInitiated) { let sampleRate: Double let samples: [Float] if OGGDecoder.isOGGFile(fileURL) { let result = try OGGDecoder.readMonoSamples(url: fileURL, maxSeconds: 30) sampleRate = result.sampleRate samples = result.samples } else { let audioFile = try AVAudioFile(forReading: fileURL) sampleRate = audioFile.processingFormat.sampleRate samples = try readMonoSamples(from: audioFile, maxSeconds: 30) } guard samples.count > fftSize * 2 else { throw KeyDetectionError.insufficientAudio } // Build chromagram let chromagram = computeChromagram(samples: samples, sampleRate: sampleRate) // Average across time let avgChroma = averageChromagram(chromagram) // Match against key profiles return matchKeyProfile(chroma: avgChroma) }.value } // MARK: - Audio Reading private static func readMonoSamples(from audioFile: AVAudioFile, maxSeconds: Double) throws -> [Float] { let sampleRate = audioFile.processingFormat.sampleRate let maxFrames = AVAudioFrameCount(min(Double(audioFile.length), sampleRate * maxSeconds)) guard let format = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: 1), let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: maxFrames) else { throw KeyDetectionError.formatError } audioFile.framePosition = 0 try audioFile.read(into: buffer, frameCount: maxFrames) guard let data = buffer.floatChannelData else { throw KeyDetectionError.noAudioData } return Array(UnsafeBufferPointer(start: data[0], count: Int(buffer.frameLength))) } // MARK: - Chromagram Computation private static func computeChromagram(samples: [Float], sampleRate: Double) -> [[Double]] { let halfFFT = fftSize / 2 let log2n = vDSP_Length(log2(Double(fftSize))) guard let fftSetup = vDSP_create_fftsetup(log2n, FFTRadix(kFFTRadix2)) else { return [] } defer { vDSP_destroy_fftsetup(fftSetup) } let numFrames = (samples.count - fftSize) / hopSize + 1 var chromagram = [[Double]]() chromagram.reserveCapacity(numFrames) var window = [Float](repeating: 0, count: fftSize) vDSP_hann_window(&window, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM)) // Pre-compute frequency-to-chroma mapping let chromaMap = buildChromaMap(fftSize: fftSize, sampleRate: sampleRate) var real = [Float](repeating: 0, count: halfFFT) var imag = [Float](repeating: 0, count: halfFFT) for frameIndex in 0..= 0 { chroma[chromaBin] += Double(magnitudes[bin]) } } chromagram.append(chroma) } return chromagram } /// Pre-compute which FFT bin maps to which chroma pitch class. private static func buildChromaMap(fftSize: Int, sampleRate: Double) -> [Int] { let halfFFT = fftSize / 2 var map = [Int](repeating: -1, count: halfFFT) for bin in 1..= 30 && frequency <= 5000 else { continue } // Convert frequency to pitch class let semitones = 12.0 * log2(frequency / referenceFrequency) let pitchClass = ((Int(round(semitones)) % 12) + 12 + 9) % 12 // A = 9, so shift to C = 0 map[bin] = pitchClass } return map } // MARK: - Average Chromagram private static func averageChromagram(_ chromagram: [[Double]]) -> [Double] { guard !chromagram.isEmpty else { return [Double](repeating: 0, count: 12) } var avg = [Double](repeating: 0, count: 12) for frame in chromagram { for i in 0..<12 { avg[i] += frame[i] } } let count = Double(chromagram.count) for i in 0..<12 { avg[i] /= count } return avg } // MARK: - Key Profile Matching private static func matchKeyProfile(chroma: [Double]) -> KeyResult { var bestCorrelation = -Double.greatestFiniteMagnitude var bestRoot = 0 var bestIsMinor = false for root in 0..<12 { // Rotate chroma so 'root' aligns with index 0 let rotated = rotateChroma(chroma, by: root) // Correlate with major profile let majorCorr = pearsonCorrelation(rotated, majorProfile) if majorCorr > bestCorrelation { bestCorrelation = majorCorr bestRoot = root bestIsMinor = false } // Correlate with minor profile let minorCorr = pearsonCorrelation(rotated, minorProfile) if minorCorr > bestCorrelation { bestCorrelation = minorCorr bestRoot = root bestIsMinor = true } } let confidence = max(0, min(1, (bestCorrelation + 1) / 2)) let keyName = "\(noteNames[bestRoot]) \(bestIsMinor ? "Minor" : "Major")" let camelot = bestIsMinor ? camelotMinor[bestRoot] : camelotMajor[bestRoot] return KeyResult( key: keyName, camelotCode: camelot, confidence: confidence, rootNote: bestRoot, isMinor: bestIsMinor ) } private static func rotateChroma(_ chroma: [Double], by amount: Int) -> [Double] { let n = chroma.count return (0.. Double { let n = Double(a.count) let sumA = a.reduce(0, +) let sumB = b.reduce(0, +) let sumAB = zip(a, b).map(*).reduce(0, +) let sumA2 = a.map { $0 * $0 }.reduce(0, +) let sumB2 = b.map { $0 * $0 }.reduce(0, +) let numerator = n * sumAB - sumA * sumB let denominator = sqrt((n * sumA2 - sumA * sumA) * (n * sumB2 - sumB * sumB)) guard denominator > 0 else { return 0 } return numerator / denominator } } // MARK: - Errors enum KeyDetectionError: Error, LocalizedError { case insufficientAudio case formatError case noAudioData var errorDescription: String? { switch self { case .insufficientAudio: return "Audio file is too short for key detection" case .formatError: return "Unable to read audio format" case .noAudioData: return "No audio data found" } } }