You need to make your transforms based on the Composited Video's output size - its .renderSize
.
Based on your other question...
So, if you have two 1280.0 x 720.0
videos, and you want them side-by-side in a 640 x 480
rendered frame, you need to:
- get the size of the first video
- scale it to
320 x 480
- move it to
0, 0
then:
- get the size of the second video
- scale it to
320 x 480
- move it to
320, 0
So your scale transform will be:
let targetWidth = renderSize.width / 2.0
let targetHeight = renderSize.height
let widthScale = targetWidth / sourceVideoSize.width
let heightScale = targetHeight / sourceVideoSize.height
let scale = CGAffineTransform(scaleX: widthScale, y: heightScale)
That should get your there --- except...
In my testing, I took 4 8-second videos in landscape orientation.
For reasons unbeknownst to me - the "native" preferredTransforms are:
Videos 1 & 3
[-1, 0, 0, -1, 1280, 720]
Videos 2 & 4
[1, 0, 0, 1, 0, 0]
So, the sizes returned by the recommended track.naturalSize.applying(track.preferredTransform)
end up being:
Videos 1 & 3
-1280 x -720
Videos 2 & 4
1280 x 720
which messes with the transforms.
After a little experimentation, if the size is negative, we need to:
- rotate the transform
- scale the transform (making sure to use positive widths/heights)
- translate the transform adjusted for the change in orientation
Here is a complete implementation (without the save-to-disk at the end):
import UIKit
import AVFoundation
class VideoViewController: UIViewController {
override func viewDidLoad() {
super.viewDidLoad()
view.backgroundColor = .systemYellow
}
override func viewDidAppear(_ animated: Bool) {
super.viewDidAppear(animated)
guard let originalVideoURL1 = Bundle.main.url(forResource: "video1", withExtension: "mov"),
let originalVideoURL2 = Bundle.main.url(forResource: "video2", withExtension: "mov")
else { return }
let firstAsset = AVURLAsset(url: originalVideoURL1)
let secondAsset = AVURLAsset(url: originalVideoURL2)
let mixComposition = AVMutableComposition()
guard let firstTrack = mixComposition.addMutableTrack(withMediaType: .video, preferredTrackID: Int32(kCMPersistentTrackID_Invalid)) else { return }
let timeRange1 = CMTimeRangeMake(start: .zero, duration: firstAsset.duration)
do {
try firstTrack.insertTimeRange(timeRange1, of: firstAsset.tracks(withMediaType: .video)[0], at: .zero)
} catch {
return
}
guard let secondTrack = mixComposition.addMutableTrack(withMediaType: .video, preferredTrackID: Int32(kCMPersistentTrackID_Invalid)) else { return }
let timeRange2 = CMTimeRangeMake(start: .zero, duration: secondAsset.duration)
do {
try secondTrack.insertTimeRange(timeRange2, of: secondAsset.tracks(withMediaType: .video)[0], at: .zero)
} catch {
return
}
let mainInstruction = AVMutableVideoCompositionInstruction()
mainInstruction.timeRange = CMTimeRangeMake(start: .zero, duration: CMTimeMaximum(firstAsset.duration, secondAsset.duration))
var track: AVAssetTrack!
track = firstAsset.tracks(withMediaType: .video).first
let firstSize = track.naturalSize.applying(track.preferredTransform)
track = secondAsset.tracks(withMediaType: .video).first
let secondSize = track.naturalSize.applying(track.preferredTransform)
// debugging
print("firstSize:", firstSize)
print("secondSize:", secondSize)
let renderSize = CGSize(width: 640, height: 480)
var scale: CGAffineTransform!
var move: CGAffineTransform!
let firstLayerInstruction = AVMutableVideoCompositionLayerInstruction(assetTrack: firstTrack)
scale = .identity
move = .identity
if (firstSize.width < 0) {
scale = CGAffineTransform(rotationAngle: .pi)
}
scale = scale.scaledBy(x: abs(renderSize.width / 2.0 / firstSize.width), y: abs(renderSize.height / firstSize.height))
move = CGAffineTransform(translationX: 0, y: 0)
if (firstSize.width < 0) {
move = CGAffineTransform(translationX: renderSize.width / 2.0, y: renderSize.height)
}
firstLayerInstruction.setTransform(scale.concatenating(move), at: .zero)
let secondLayerInstruction = AVMutableVideoCompositionLayerInstruction(assetTrack: secondTrack)
scale = .identity
move = .identity
if (secondSize.width < 0) {
scale = CGAffineTransform(rotationAngle: .pi)
}
scale = scale.scaledBy(x: abs(renderSize.width / 2.0 / secondSize.width), y: abs(renderSize.height / secondSize.height))
move = CGAffineTransform(translationX: renderSize.width / 2.0, y: 0)
if (secondSize.width < 0) {
move = CGAffineTransform(translationX: renderSize.width, y: renderSize.height)
}
secondLayerInstruction.setTransform(scale.concatenating(move), at: .zero)
mainInstruction.layerInstructions = [firstLayerInstruction, secondLayerInstruction]
let mainCompositionInst = AVMutableVideoComposition()
mainCompositionInst.instructions = [mainInstruction]
mainCompositionInst.frameDuration = CMTime(value: 1, timescale: 30)
mainCompositionInst.renderSize = renderSize
let newPlayerItem = AVPlayerItem(asset: mixComposition)
newPlayerItem.videoComposition = mainCompositionInst
let player = AVPlayer(playerItem: newPlayerItem)
let playerLayer = AVPlayerLayer(player: player)
playerLayer.frame = view.bounds
view.layer.addSublayer(playerLayer)
player.seek(to: .zero)
player.play()
// video export code goes here...
}
}
It's possible that the preferredTransforms could also be different for front / back camera, mirrored, etc. But I'll leave that up to you to work out.
Edit
Sample project at: https://github.com/DonMag/VideoTest
Produces (using two 720 x 1280
video clips):
