| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462 |
- import AVFoundation
- import Foundation
- /// Stitches multiple audio files into a single WAV file with embedded cue markers
- /// at track boundaries. Generates companion marker files for DAW import.
- struct AudioStitcher {
- /// Result of a stitch operation.
- struct StitchResult {
- let outputURL: URL
- let markers: [TrackMarker]
- let totalDuration: TimeInterval
- let sampleRate: Double
- let channels: Int
- }
- /// A marker representing where a track starts/ends in the stitched file.
- struct TrackMarker {
- let name: String
- let artist: String
- let album: String
- let startTime: TimeInterval
- let endTime: TimeInterval
- let startSample: Int64
- let endSample: Int64
- var duration: TimeInterval { endTime - startTime }
- }
- /// Options for stitching.
- struct StitchOptions {
- /// Output sample rate (nil = use first track's rate).
- var sampleRate: Double? = nil
- /// Output bit depth.
- var bitDepth: Int = 24
- /// Gap between tracks in seconds (0 = gapless, negative = crossfade overlap).
- var gapDuration: TimeInterval = 0
- /// Crossfade duration in seconds. Overrides gapDuration if > 0.
- var crossfadeDuration: TimeInterval = 0
- /// Use per-entry crossfade settings from the playlist.
- var usePlaylistCrossfades: Bool = true
- static let `default` = StitchOptions()
- }
- // MARK: - Stitch
- /// Stitch all tracks in a playlist into a single WAV file.
- @MainActor
- static func stitch(
- playlist: Playlist,
- to outputURL: URL,
- options: StitchOptions = .default
- ) async throws -> StitchResult {
- let entries = playlist.sortedEntries
- guard !entries.isEmpty else { throw StitchError.emptyPlaylist }
- // Determine output format from first local track
- guard let firstTrack = entries.compactMap({ $0.track }).first(where: { $0.hasLocalFile }) else {
- throw StitchError.emptyPlaylist
- }
- let sampleRate: Double
- if let customRate = options.sampleRate {
- sampleRate = customRate
- } else if OGGDecoder.isOGGFile(firstTrack.fileURL),
- let info = OGGDecoder.fileInfo(url: firstTrack.fileURL) {
- sampleRate = info.sampleRate
- } else {
- let firstFile = try AVAudioFile(forReading: firstTrack.fileURL)
- sampleRate = firstFile.processingFormat.sampleRate
- }
- let channels: AVAudioChannelCount = 2
- // Create output file
- let settings: [String: Any] = [
- AVFormatIDKey: Int(kAudioFormatLinearPCM),
- AVSampleRateKey: sampleRate,
- AVNumberOfChannelsKey: Int(channels),
- AVLinearPCMBitDepthKey: options.bitDepth,
- AVLinearPCMIsFloatKey: options.bitDepth == 32,
- AVLinearPCMIsBigEndianKey: false,
- AVLinearPCMIsNonInterleaved: false
- ]
- let outputFile = try AVAudioFile(
- forWriting: outputURL,
- settings: settings
- )
- // Use the file's own processingFormat for all buffers
- let outputFormat = outputFile.processingFormat
- var markers: [TrackMarker] = []
- var currentSample: Int64 = 0
- var currentTime: TimeInterval = 0
- // Process each entry
- for (index, entry) in entries.enumerated() {
- guard let track = entry.track, track.hasLocalFile else { continue }
- let startSample = currentSample
- let startTime = currentTime
- // Read the source audio
- let sourceFormat: AVAudioFormat
- let sourceBuffer: AVAudioPCMBuffer
- let totalFrames: AVAudioFramePosition
- if OGGDecoder.isOGGFile(track.fileURL) {
- // Decode OGG to PCM buffer
- let (oggBuffer, oggFormat) = try OGGDecoder.decode(url: track.fileURL)
- sourceFormat = oggFormat
- sourceBuffer = oggBuffer
- totalFrames = AVAudioFramePosition(oggBuffer.frameLength)
- } else {
- // Read the source audio in its processingFormat (auto-decompresses)
- let sourceFile = try AVAudioFile(forReading: track.fileURL)
- sourceFormat = sourceFile.processingFormat
- // Read in chunks to handle large files
- totalFrames = sourceFile.length
- let chunkSize: AVAudioFrameCount = 65536
- var allSamples: AVAudioPCMBuffer?
- // For simplicity, read entire file then convert
- let frameCount = AVAudioFrameCount(totalFrames)
- guard let buffer = AVAudioPCMBuffer(
- pcmFormat: sourceFormat,
- frameCapacity: frameCount
- ) else { continue }
- sourceFile.framePosition = 0
- try sourceFile.read(into: buffer, frameCount: frameCount)
- sourceBuffer = buffer
- }
- let frameCount = AVAudioFrameCount(totalFrames)
- // Convert to output format if needed
- let convertedBuffer: AVAudioPCMBuffer
- if sourceFormat != outputFormat {
- guard let converter = AVAudioConverter(from: sourceFormat, to: outputFormat) else {
- throw StitchError.conversionError
- }
- let ratio = outputFormat.sampleRate / sourceFormat.sampleRate
- let outputFrameCapacity = AVAudioFrameCount(Double(frameCount) * ratio) + 1024
- guard let converted = AVAudioPCMBuffer(
- pcmFormat: outputFormat,
- frameCapacity: outputFrameCapacity
- ) else { continue }
- var error: NSError?
- let inputBlock: AVAudioConverterInputBlock = { _, outStatus in
- outStatus.pointee = .haveData
- return sourceBuffer
- }
- converter.convert(to: converted, error: &error, withInputFrom: inputBlock)
- if let error { throw error }
- convertedBuffer = converted
- } else {
- convertedBuffer = sourceBuffer
- }
- // Apply start/end offsets
- let startOffset = entry.startOffset
- let endOffset = entry.endOffset > 0 ? entry.endOffset : Double(convertedBuffer.frameLength) / sampleRate
- let startFrame = AVAudioFramePosition(startOffset * sampleRate)
- let endFrame = min(AVAudioFramePosition(endOffset * sampleRate), AVAudioFramePosition(convertedBuffer.frameLength))
- let framesToWrite = AVAudioFrameCount(endFrame - startFrame)
- guard framesToWrite > 0 else { continue }
- // Create a sub-buffer for the trimmed region
- guard let trimmedBuffer = AVAudioPCMBuffer(
- pcmFormat: outputFormat,
- frameCapacity: framesToWrite
- ) else { continue }
- // Copy trimmed frames
- let chCount = Int(outputFormat.channelCount)
- if let srcData = convertedBuffer.floatChannelData,
- let dstData = trimmedBuffer.floatChannelData {
- for ch in 0..<chCount {
- let src = srcData[ch].advanced(by: Int(startFrame))
- let dst = dstData[ch]
- dst.update(from: src, count: Int(framesToWrite))
- }
- trimmedBuffer.frameLength = framesToWrite
- } else if let srcData = convertedBuffer.int16ChannelData,
- let dstData = trimmedBuffer.int16ChannelData {
- for ch in 0..<chCount {
- let src = srcData[ch].advanced(by: Int(startFrame))
- let dst = dstData[ch]
- dst.update(from: src, count: Int(framesToWrite))
- }
- trimmedBuffer.frameLength = framesToWrite
- } else if let srcData = convertedBuffer.int32ChannelData,
- let dstData = trimmedBuffer.int32ChannelData {
- for ch in 0..<chCount {
- let src = srcData[ch].advanced(by: Int(startFrame))
- let dst = dstData[ch]
- dst.update(from: src, count: Int(framesToWrite))
- }
- trimmedBuffer.frameLength = framesToWrite
- }
- // Apply gain adjustment
- if entry.gainAdjustment != 0 {
- applyGain(to: trimmedBuffer, gainDB: Float(entry.gainAdjustment))
- }
- // Write to output
- try outputFile.write(from: trimmedBuffer)
- let writtenDuration = Double(framesToWrite) / sampleRate
- currentSample += Int64(framesToWrite)
- currentTime += writtenDuration
- // Record marker
- markers.append(TrackMarker(
- name: track.title,
- artist: track.artist,
- album: track.album,
- startTime: startTime,
- endTime: currentTime,
- startSample: startSample,
- endSample: currentSample
- ))
- // Apply gap/crossfade for next track
- if index < entries.count - 1 {
- let crossfade: TimeInterval
- if options.usePlaylistCrossfades {
- crossfade = entries[index + 1].crossfadeDuration
- } else {
- crossfade = options.crossfadeDuration
- }
- if crossfade > 0 {
- // For crossfade, we overlap: rewind the write position conceptually
- // (In a simple stitch, we just note the overlap in markers)
- // Real crossfade would require mixing overlapping regions
- // For now, insert silence gap as negative crossfade
- } else if options.gapDuration > 0 {
- // Insert silence gap
- let gapFrames = AVAudioFrameCount(options.gapDuration * sampleRate)
- guard let silenceBuffer = AVAudioPCMBuffer(
- pcmFormat: outputFormat,
- frameCapacity: gapFrames
- ) else { continue }
- silenceBuffer.frameLength = gapFrames
- // Buffer is already zeroed
- try outputFile.write(from: silenceBuffer)
- currentSample += Int64(gapFrames)
- currentTime += options.gapDuration
- }
- }
- }
- return StitchResult(
- outputURL: outputURL,
- markers: markers,
- totalDuration: currentTime,
- sampleRate: sampleRate,
- channels: Int(channels)
- )
- }
- // MARK: - Gain
- private static func applyGain(to buffer: AVAudioPCMBuffer, gainDB: Float) {
- let gain = powf(10.0, gainDB / 20.0)
- guard let channelData = buffer.floatChannelData else { return }
- let frameCount = Int(buffer.frameLength)
- let channelCount = Int(buffer.format.channelCount)
- for ch in 0..<channelCount {
- let ptr = channelData[ch]
- for i in 0..<frameCount {
- ptr[i] *= gain
- }
- }
- }
- // MARK: - Companion Files
- /// Generate Adobe Audition-compatible markers CSV.
- /// Import in Audition: open WAV, then Markers panel > Import...
- static func writeAuditionMarkers(_ markers: [TrackMarker], to url: URL) throws {
- // Audition marker import format: tab-separated
- // Name, Start, Duration, Time Format, Type, Description
- var csv = "Name\tStart\tDuration\tTime Format\tType\tDescription\n"
- for (index, marker) in markers.enumerated() {
- let start = formatHMSMs(marker.startTime)
- let duration = formatHMSMs(marker.duration)
- let description = [marker.artist, marker.album].filter { !$0.isEmpty }.joined(separator: " - ")
- let name = "\(String(format: "%02d", index + 1)). \(marker.name)"
- // Cue marker at the start of each track
- csv += "\(name)\t\(start)\t\(duration)\tDecimal\tCue\t\(description)\n"
- }
- try csv.write(to: url, atomically: true, encoding: .utf8)
- }
- /// Write a simple text track list with timecodes (human-readable).
- static func writeTrackList(_ markers: [TrackMarker], playlistName: String, to url: URL) throws {
- var lines: [String] = []
- lines.append("\(playlistName) — Track List")
- lines.append("Generated by MixBoard on \(ISO8601DateFormatter().string(from: Date()))")
- lines.append(String(repeating: "─", count: 70))
- lines.append("")
- for (index, marker) in markers.enumerated() {
- let num = String(format: "%02d", index + 1)
- let start = formatHMSMs(marker.startTime)
- let end = formatHMSMs(marker.endTime)
- let dur = formatHMSMs(marker.duration)
- let artist = marker.artist.isEmpty ? "" : " — \(marker.artist)"
- lines.append(" \(num). \(marker.name)\(artist)")
- lines.append(" Start: \(start) End: \(end) Duration: \(dur)")
- if !marker.album.isEmpty {
- lines.append(" Album: \(marker.album)")
- }
- lines.append("")
- }
- if let last = markers.last {
- lines.append(String(repeating: "─", count: 70))
- lines.append("Total: \(markers.count) tracks, \(formatHMSMs(last.endTime))")
- }
- let content = lines.joined(separator: "\n") + "\n"
- try content.write(to: url, atomically: true, encoding: .utf8)
- }
- /// Generate a CUE sheet referencing the stitched file.
- static func writeCueSheet(
- _ markers: [TrackMarker],
- audioFileName: String,
- playlistName: String,
- to url: URL
- ) throws {
- var lines: [String] = []
- lines.append("REM Generated by MixBoard (stitched export)")
- lines.append("REM Date: \(ISO8601DateFormatter().string(from: Date()))")
- lines.append("TITLE \"\(playlistName)\"")
- lines.append("FILE \"\(audioFileName)\" WAVE")
- for (index, marker) in markers.enumerated() {
- let trackNum = String(format: "%02d", index + 1)
- lines.append(" TRACK \(trackNum) AUDIO")
- lines.append(" TITLE \"\(marker.name)\"")
- if !marker.artist.isEmpty {
- lines.append(" PERFORMER \"\(marker.artist)\"")
- }
- lines.append(" INDEX 01 \(formatCueTime(marker.startTime))")
- }
- let content = lines.joined(separator: "\n") + "\n"
- try content.write(to: url, atomically: true, encoding: .utf8)
- }
- /// Generate an Adobe Audition session (.sesx) referencing the stitched file with markers.
- /// Note: This uses a best-effort approximation of Audition's XML format.
- /// For guaranteed marker import, use the CSV markers file instead.
- static func writeAuditionSession(
- _ markers: [TrackMarker],
- audioFilePath: String,
- audioFileName: String,
- playlistName: String,
- sampleRate: Double,
- totalDuration: TimeInterval,
- to url: URL
- ) throws {
- let totalSamples = Int64(totalDuration * sampleRate)
- // Audition .sesx is XML-based. This approximates its structure.
- var xml = """
- <?xml version="1.0" encoding="UTF-8" standalone="no"?>
- <sesx version="1.2">
- <session name="\(escapeXML(playlistName))" sampleRate="\(Int(sampleRate))" bitDepth="32" audioChannelType="stereo" duration="\(totalSamples)" padding="0">
- <files>
- <file id="file-1" relativePath="\(escapeXML(audioFileName))" absolutePath="\(escapeXML(audioFilePath))" />
- </files>
- <tracks>
- <audioTrack id="track-1" name="Mix" index="0" color="#4A86C8" visible="true" mute="false" solo="false" select="false" height="150">
- <trackParameters>
- <trackParameter name="volume" value="1.0" />
- <trackParameter name="pan" value="0.0" />
- </trackParameters>
- <audioClip id="clip-1" name="\(escapeXML(playlistName))" fileID="file-1" startPoint="0" endPoint="\(totalSamples)" sourceInPoint="0" sourceOutPoint="\(totalSamples)">
- </audioClip>
- </audioTrack>
- </tracks>
- <markers>
- """
- for (index, marker) in markers.enumerated() {
- let id = "marker-\(index * 2 + 1)"
- let id2 = "marker-\(index * 2 + 2)"
- let name = "\(String(format: "%02d", index + 1)). \(escapeXML(marker.name))"
- xml += """
- <marker id="\(id)" name="\(name)" time="\(marker.startSample)" type="cue" description="\(escapeXML(marker.artist))" />
- <marker id="\(id2)" name="\(name) [END]" time="\(marker.endSample)" type="cue" description="" />
- """
- }
- xml += """
- </markers>
- </session>
- </sesx>
- """
- try xml.write(to: url, atomically: true, encoding: .utf8)
- }
- // MARK: - Helpers
- private static func formatHMSMs(_ seconds: TimeInterval) -> String {
- let hours = Int(seconds) / 3600
- let minutes = (Int(seconds) % 3600) / 60
- let secs = Int(seconds) % 60
- let millis = Int((seconds - Double(Int(seconds))) * 1000)
- return String(format: "%02d:%02d:%02d.%03d", hours, minutes, secs, millis)
- }
- private static func formatCueTime(_ seconds: TimeInterval) -> String {
- let minutes = Int(seconds) / 60
- let secs = Int(seconds) % 60
- let frames = Int((seconds - Double(Int(seconds))) * 75)
- return String(format: "%02d:%02d:%02d", minutes, secs, frames)
- }
- private static func escapeXML(_ string: String) -> String {
- string
- .replacingOccurrences(of: "&", with: "&")
- .replacingOccurrences(of: "<", with: "<")
- .replacingOccurrences(of: ">", with: ">")
- .replacingOccurrences(of: "\"", with: """)
- }
- }
- // MARK: - Errors
- enum StitchError: Error, LocalizedError {
- case emptyPlaylist
- case formatError
- case conversionError
- var errorDescription: String? {
- switch self {
- case .emptyPlaylist: return "Playlist is empty"
- case .formatError: return "Unable to create audio format"
- case .conversionError: return "Audio conversion failed"
- }
- }
- }
|