AudioStitcher.swift 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. import AVFoundation
  2. import Foundation
  3. /// Stitches multiple audio files into a single WAV file with embedded cue markers
  4. /// at track boundaries. Generates companion marker files for DAW import.
  5. struct AudioStitcher {
  6. /// Result of a stitch operation.
  7. struct StitchResult {
  8. let outputURL: URL
  9. let markers: [TrackMarker]
  10. let totalDuration: TimeInterval
  11. let sampleRate: Double
  12. let channels: Int
  13. }
  14. /// A marker representing where a track starts/ends in the stitched file.
  15. struct TrackMarker {
  16. let name: String
  17. let artist: String
  18. let album: String
  19. let startTime: TimeInterval
  20. let endTime: TimeInterval
  21. let startSample: Int64
  22. let endSample: Int64
  23. var duration: TimeInterval { endTime - startTime }
  24. }
  25. /// Options for stitching.
  26. struct StitchOptions {
  27. /// Output sample rate (nil = use first track's rate).
  28. var sampleRate: Double? = nil
  29. /// Output bit depth.
  30. var bitDepth: Int = 24
  31. /// Gap between tracks in seconds (0 = gapless, negative = crossfade overlap).
  32. var gapDuration: TimeInterval = 0
  33. /// Crossfade duration in seconds. Overrides gapDuration if > 0.
  34. var crossfadeDuration: TimeInterval = 0
  35. /// Use per-entry crossfade settings from the playlist.
  36. var usePlaylistCrossfades: Bool = true
  37. static let `default` = StitchOptions()
  38. }
  39. // MARK: - Stitch
  40. /// Stitch all tracks in a playlist into a single WAV file.
  41. @MainActor
  42. static func stitch(
  43. playlist: Playlist,
  44. to outputURL: URL,
  45. options: StitchOptions = .default
  46. ) async throws -> StitchResult {
  47. let entries = playlist.sortedEntries
  48. guard !entries.isEmpty else { throw StitchError.emptyPlaylist }
  49. // Determine output format from first local track
  50. guard let firstTrack = entries.compactMap({ $0.track }).first(where: { $0.hasLocalFile }) else {
  51. throw StitchError.emptyPlaylist
  52. }
  53. let sampleRate: Double
  54. if let customRate = options.sampleRate {
  55. sampleRate = customRate
  56. } else if OGGDecoder.isOGGFile(firstTrack.fileURL),
  57. let info = OGGDecoder.fileInfo(url: firstTrack.fileURL) {
  58. sampleRate = info.sampleRate
  59. } else {
  60. let firstFile = try AVAudioFile(forReading: firstTrack.fileURL)
  61. sampleRate = firstFile.processingFormat.sampleRate
  62. }
  63. let channels: AVAudioChannelCount = 2
  64. // Create output file
  65. let settings: [String: Any] = [
  66. AVFormatIDKey: Int(kAudioFormatLinearPCM),
  67. AVSampleRateKey: sampleRate,
  68. AVNumberOfChannelsKey: Int(channels),
  69. AVLinearPCMBitDepthKey: options.bitDepth,
  70. AVLinearPCMIsFloatKey: options.bitDepth == 32,
  71. AVLinearPCMIsBigEndianKey: false,
  72. AVLinearPCMIsNonInterleaved: false
  73. ]
  74. let outputFile = try AVAudioFile(
  75. forWriting: outputURL,
  76. settings: settings
  77. )
  78. // Use the file's own processingFormat for all buffers
  79. let outputFormat = outputFile.processingFormat
  80. var markers: [TrackMarker] = []
  81. var currentSample: Int64 = 0
  82. var currentTime: TimeInterval = 0
  83. // Process each entry
  84. for (index, entry) in entries.enumerated() {
  85. guard let track = entry.track, track.hasLocalFile else { continue }
  86. let startSample = currentSample
  87. let startTime = currentTime
  88. // Read the source audio
  89. let sourceFormat: AVAudioFormat
  90. let sourceBuffer: AVAudioPCMBuffer
  91. let totalFrames: AVAudioFramePosition
  92. if OGGDecoder.isOGGFile(track.fileURL) {
  93. // Decode OGG to PCM buffer
  94. let (oggBuffer, oggFormat) = try OGGDecoder.decode(url: track.fileURL)
  95. sourceFormat = oggFormat
  96. sourceBuffer = oggBuffer
  97. totalFrames = AVAudioFramePosition(oggBuffer.frameLength)
  98. } else {
  99. // Read the source audio in its processingFormat (auto-decompresses)
  100. let sourceFile = try AVAudioFile(forReading: track.fileURL)
  101. sourceFormat = sourceFile.processingFormat
  102. // Read in chunks to handle large files
  103. totalFrames = sourceFile.length
  104. let chunkSize: AVAudioFrameCount = 65536
  105. var allSamples: AVAudioPCMBuffer?
  106. // For simplicity, read entire file then convert
  107. let frameCount = AVAudioFrameCount(totalFrames)
  108. guard let buffer = AVAudioPCMBuffer(
  109. pcmFormat: sourceFormat,
  110. frameCapacity: frameCount
  111. ) else { continue }
  112. sourceFile.framePosition = 0
  113. try sourceFile.read(into: buffer, frameCount: frameCount)
  114. sourceBuffer = buffer
  115. }
  116. let frameCount = AVAudioFrameCount(totalFrames)
  117. // Convert to output format if needed
  118. let convertedBuffer: AVAudioPCMBuffer
  119. if sourceFormat != outputFormat {
  120. guard let converter = AVAudioConverter(from: sourceFormat, to: outputFormat) else {
  121. throw StitchError.conversionError
  122. }
  123. let ratio = outputFormat.sampleRate / sourceFormat.sampleRate
  124. let outputFrameCapacity = AVAudioFrameCount(Double(frameCount) * ratio) + 1024
  125. guard let converted = AVAudioPCMBuffer(
  126. pcmFormat: outputFormat,
  127. frameCapacity: outputFrameCapacity
  128. ) else { continue }
  129. var error: NSError?
  130. let inputBlock: AVAudioConverterInputBlock = { _, outStatus in
  131. outStatus.pointee = .haveData
  132. return sourceBuffer
  133. }
  134. converter.convert(to: converted, error: &error, withInputFrom: inputBlock)
  135. if let error { throw error }
  136. convertedBuffer = converted
  137. } else {
  138. convertedBuffer = sourceBuffer
  139. }
  140. // Apply start/end offsets
  141. let startOffset = entry.startOffset
  142. let endOffset = entry.endOffset > 0 ? entry.endOffset : Double(convertedBuffer.frameLength) / sampleRate
  143. let startFrame = AVAudioFramePosition(startOffset * sampleRate)
  144. let endFrame = min(AVAudioFramePosition(endOffset * sampleRate), AVAudioFramePosition(convertedBuffer.frameLength))
  145. let framesToWrite = AVAudioFrameCount(endFrame - startFrame)
  146. guard framesToWrite > 0 else { continue }
  147. // Create a sub-buffer for the trimmed region
  148. guard let trimmedBuffer = AVAudioPCMBuffer(
  149. pcmFormat: outputFormat,
  150. frameCapacity: framesToWrite
  151. ) else { continue }
  152. // Copy trimmed frames
  153. let chCount = Int(outputFormat.channelCount)
  154. if let srcData = convertedBuffer.floatChannelData,
  155. let dstData = trimmedBuffer.floatChannelData {
  156. for ch in 0..<chCount {
  157. let src = srcData[ch].advanced(by: Int(startFrame))
  158. let dst = dstData[ch]
  159. dst.update(from: src, count: Int(framesToWrite))
  160. }
  161. trimmedBuffer.frameLength = framesToWrite
  162. } else if let srcData = convertedBuffer.int16ChannelData,
  163. let dstData = trimmedBuffer.int16ChannelData {
  164. for ch in 0..<chCount {
  165. let src = srcData[ch].advanced(by: Int(startFrame))
  166. let dst = dstData[ch]
  167. dst.update(from: src, count: Int(framesToWrite))
  168. }
  169. trimmedBuffer.frameLength = framesToWrite
  170. } else if let srcData = convertedBuffer.int32ChannelData,
  171. let dstData = trimmedBuffer.int32ChannelData {
  172. for ch in 0..<chCount {
  173. let src = srcData[ch].advanced(by: Int(startFrame))
  174. let dst = dstData[ch]
  175. dst.update(from: src, count: Int(framesToWrite))
  176. }
  177. trimmedBuffer.frameLength = framesToWrite
  178. }
  179. // Apply gain adjustment
  180. if entry.gainAdjustment != 0 {
  181. applyGain(to: trimmedBuffer, gainDB: Float(entry.gainAdjustment))
  182. }
  183. // Write to output
  184. try outputFile.write(from: trimmedBuffer)
  185. let writtenDuration = Double(framesToWrite) / sampleRate
  186. currentSample += Int64(framesToWrite)
  187. currentTime += writtenDuration
  188. // Record marker
  189. markers.append(TrackMarker(
  190. name: track.title,
  191. artist: track.artist,
  192. album: track.album,
  193. startTime: startTime,
  194. endTime: currentTime,
  195. startSample: startSample,
  196. endSample: currentSample
  197. ))
  198. // Apply gap/crossfade for next track
  199. if index < entries.count - 1 {
  200. let crossfade: TimeInterval
  201. if options.usePlaylistCrossfades {
  202. crossfade = entries[index + 1].crossfadeDuration
  203. } else {
  204. crossfade = options.crossfadeDuration
  205. }
  206. if crossfade > 0 {
  207. // For crossfade, we overlap: rewind the write position conceptually
  208. // (In a simple stitch, we just note the overlap in markers)
  209. // Real crossfade would require mixing overlapping regions
  210. // For now, insert silence gap as negative crossfade
  211. } else if options.gapDuration > 0 {
  212. // Insert silence gap
  213. let gapFrames = AVAudioFrameCount(options.gapDuration * sampleRate)
  214. guard let silenceBuffer = AVAudioPCMBuffer(
  215. pcmFormat: outputFormat,
  216. frameCapacity: gapFrames
  217. ) else { continue }
  218. silenceBuffer.frameLength = gapFrames
  219. // Buffer is already zeroed
  220. try outputFile.write(from: silenceBuffer)
  221. currentSample += Int64(gapFrames)
  222. currentTime += options.gapDuration
  223. }
  224. }
  225. }
  226. return StitchResult(
  227. outputURL: outputURL,
  228. markers: markers,
  229. totalDuration: currentTime,
  230. sampleRate: sampleRate,
  231. channels: Int(channels)
  232. )
  233. }
  234. // MARK: - Gain
  235. private static func applyGain(to buffer: AVAudioPCMBuffer, gainDB: Float) {
  236. let gain = powf(10.0, gainDB / 20.0)
  237. guard let channelData = buffer.floatChannelData else { return }
  238. let frameCount = Int(buffer.frameLength)
  239. let channelCount = Int(buffer.format.channelCount)
  240. for ch in 0..<channelCount {
  241. let ptr = channelData[ch]
  242. for i in 0..<frameCount {
  243. ptr[i] *= gain
  244. }
  245. }
  246. }
  247. // MARK: - Companion Files
  248. /// Generate Adobe Audition-compatible markers CSV.
  249. /// Import in Audition: open WAV, then Markers panel > Import...
  250. static func writeAuditionMarkers(_ markers: [TrackMarker], to url: URL) throws {
  251. // Audition marker import format: tab-separated
  252. // Name, Start, Duration, Time Format, Type, Description
  253. var csv = "Name\tStart\tDuration\tTime Format\tType\tDescription\n"
  254. for (index, marker) in markers.enumerated() {
  255. let start = formatHMSMs(marker.startTime)
  256. let duration = formatHMSMs(marker.duration)
  257. let description = [marker.artist, marker.album].filter { !$0.isEmpty }.joined(separator: " - ")
  258. let name = "\(String(format: "%02d", index + 1)). \(marker.name)"
  259. // Cue marker at the start of each track
  260. csv += "\(name)\t\(start)\t\(duration)\tDecimal\tCue\t\(description)\n"
  261. }
  262. try csv.write(to: url, atomically: true, encoding: .utf8)
  263. }
  264. /// Write a simple text track list with timecodes (human-readable).
  265. static func writeTrackList(_ markers: [TrackMarker], playlistName: String, to url: URL) throws {
  266. var lines: [String] = []
  267. lines.append("\(playlistName) — Track List")
  268. lines.append("Generated by MixBoard on \(ISO8601DateFormatter().string(from: Date()))")
  269. lines.append(String(repeating: "─", count: 70))
  270. lines.append("")
  271. for (index, marker) in markers.enumerated() {
  272. let num = String(format: "%02d", index + 1)
  273. let start = formatHMSMs(marker.startTime)
  274. let end = formatHMSMs(marker.endTime)
  275. let dur = formatHMSMs(marker.duration)
  276. let artist = marker.artist.isEmpty ? "" : " — \(marker.artist)"
  277. lines.append(" \(num). \(marker.name)\(artist)")
  278. lines.append(" Start: \(start) End: \(end) Duration: \(dur)")
  279. if !marker.album.isEmpty {
  280. lines.append(" Album: \(marker.album)")
  281. }
  282. lines.append("")
  283. }
  284. if let last = markers.last {
  285. lines.append(String(repeating: "─", count: 70))
  286. lines.append("Total: \(markers.count) tracks, \(formatHMSMs(last.endTime))")
  287. }
  288. let content = lines.joined(separator: "\n") + "\n"
  289. try content.write(to: url, atomically: true, encoding: .utf8)
  290. }
  291. /// Generate a CUE sheet referencing the stitched file.
  292. static func writeCueSheet(
  293. _ markers: [TrackMarker],
  294. audioFileName: String,
  295. playlistName: String,
  296. to url: URL
  297. ) throws {
  298. var lines: [String] = []
  299. lines.append("REM Generated by MixBoard (stitched export)")
  300. lines.append("REM Date: \(ISO8601DateFormatter().string(from: Date()))")
  301. lines.append("TITLE \"\(playlistName)\"")
  302. lines.append("FILE \"\(audioFileName)\" WAVE")
  303. for (index, marker) in markers.enumerated() {
  304. let trackNum = String(format: "%02d", index + 1)
  305. lines.append(" TRACK \(trackNum) AUDIO")
  306. lines.append(" TITLE \"\(marker.name)\"")
  307. if !marker.artist.isEmpty {
  308. lines.append(" PERFORMER \"\(marker.artist)\"")
  309. }
  310. lines.append(" INDEX 01 \(formatCueTime(marker.startTime))")
  311. }
  312. let content = lines.joined(separator: "\n") + "\n"
  313. try content.write(to: url, atomically: true, encoding: .utf8)
  314. }
  315. /// Generate an Adobe Audition session (.sesx) referencing the stitched file with markers.
  316. /// Note: This uses a best-effort approximation of Audition's XML format.
  317. /// For guaranteed marker import, use the CSV markers file instead.
  318. static func writeAuditionSession(
  319. _ markers: [TrackMarker],
  320. audioFilePath: String,
  321. audioFileName: String,
  322. playlistName: String,
  323. sampleRate: Double,
  324. totalDuration: TimeInterval,
  325. to url: URL
  326. ) throws {
  327. let totalSamples = Int64(totalDuration * sampleRate)
  328. // Audition .sesx is XML-based. This approximates its structure.
  329. var xml = """
  330. <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  331. <sesx version="1.2">
  332. <session name="\(escapeXML(playlistName))" sampleRate="\(Int(sampleRate))" bitDepth="32" audioChannelType="stereo" duration="\(totalSamples)" padding="0">
  333. <files>
  334. <file id="file-1" relativePath="\(escapeXML(audioFileName))" absolutePath="\(escapeXML(audioFilePath))" />
  335. </files>
  336. <tracks>
  337. <audioTrack id="track-1" name="Mix" index="0" color="#4A86C8" visible="true" mute="false" solo="false" select="false" height="150">
  338. <trackParameters>
  339. <trackParameter name="volume" value="1.0" />
  340. <trackParameter name="pan" value="0.0" />
  341. </trackParameters>
  342. <audioClip id="clip-1" name="\(escapeXML(playlistName))" fileID="file-1" startPoint="0" endPoint="\(totalSamples)" sourceInPoint="0" sourceOutPoint="\(totalSamples)">
  343. </audioClip>
  344. </audioTrack>
  345. </tracks>
  346. <markers>
  347. """
  348. for (index, marker) in markers.enumerated() {
  349. let id = "marker-\(index * 2 + 1)"
  350. let id2 = "marker-\(index * 2 + 2)"
  351. let name = "\(String(format: "%02d", index + 1)). \(escapeXML(marker.name))"
  352. xml += """
  353. <marker id="\(id)" name="\(name)" time="\(marker.startSample)" type="cue" description="\(escapeXML(marker.artist))" />
  354. <marker id="\(id2)" name="\(name) [END]" time="\(marker.endSample)" type="cue" description="" />
  355. """
  356. }
  357. xml += """
  358. </markers>
  359. </session>
  360. </sesx>
  361. """
  362. try xml.write(to: url, atomically: true, encoding: .utf8)
  363. }
  364. // MARK: - Helpers
  365. private static func formatHMSMs(_ seconds: TimeInterval) -> String {
  366. let hours = Int(seconds) / 3600
  367. let minutes = (Int(seconds) % 3600) / 60
  368. let secs = Int(seconds) % 60
  369. let millis = Int((seconds - Double(Int(seconds))) * 1000)
  370. return String(format: "%02d:%02d:%02d.%03d", hours, minutes, secs, millis)
  371. }
  372. private static func formatCueTime(_ seconds: TimeInterval) -> String {
  373. let minutes = Int(seconds) / 60
  374. let secs = Int(seconds) % 60
  375. let frames = Int((seconds - Double(Int(seconds))) * 75)
  376. return String(format: "%02d:%02d:%02d", minutes, secs, frames)
  377. }
  378. private static func escapeXML(_ string: String) -> String {
  379. string
  380. .replacingOccurrences(of: "&", with: "&amp;")
  381. .replacingOccurrences(of: "<", with: "&lt;")
  382. .replacingOccurrences(of: ">", with: "&gt;")
  383. .replacingOccurrences(of: "\"", with: "&quot;")
  384. }
  385. }
  386. // MARK: - Errors
  387. enum StitchError: Error, LocalizedError {
  388. case emptyPlaylist
  389. case formatError
  390. case conversionError
  391. var errorDescription: String? {
  392. switch self {
  393. case .emptyPlaylist: return "Playlist is empty"
  394. case .formatError: return "Unable to create audio format"
  395. case .conversionError: return "Audio conversion failed"
  396. }
  397. }
  398. }