20

What is the most efficient way to capture frames from a MTKView? If possible, I would like to save a .mov file from the frames in realtime. Is it possible to render into an AVPlayer frame or something?

It is currently drawing with this code (based on @warrenm PerformanceShaders project):

func draw(in view: MTKView) {
     _ = inflightSemaphore.wait(timeout: DispatchTime.distantFuture)
             updateBuffers()

    let commandBuffer = commandQueue.makeCommandBuffer()

    commandBuffer.addCompletedHandler{ [weak self] commandBuffer in
        if let strongSelf = self {
            strongSelf.inflightSemaphore.signal()
        }
    }
   // Dispatch the current kernel to perform the selected image filter
    selectedKernel.encode(commandBuffer: commandBuffer,
        sourceTexture: kernelSourceTexture!,
        destinationTexture: kernelDestTexture!)

    if let renderPassDescriptor = view.currentRenderPassDescriptor, let currentDrawable = view.currentDrawable
    {
        let clearColor = MTLClearColor(red: 0, green: 0, blue: 0, alpha: 1)
        renderPassDescriptor.colorAttachments[0].clearColor = clearColor

        let renderEncoder = commandBuffer.makeRenderCommandEncoder(descriptor: renderPassDescriptor)
        renderEncoder.label = "Main pass"

        renderEncoder.pushDebugGroup("Draw textured square")
        renderEncoder.setFrontFacing(.counterClockwise)
        renderEncoder.setCullMode(.back)

        renderEncoder.setRenderPipelineState(pipelineState)
        renderEncoder.setVertexBuffer(vertexBuffer, offset: MBEVertexDataSize * bufferIndex, at: 0)
        renderEncoder.setVertexBuffer(uniformBuffer, offset: MBEUniformDataSize * bufferIndex , at: 1)
        renderEncoder.setFragmentTexture(kernelDestTexture, at: 0)
        renderEncoder.setFragmentSamplerState(sampler, at: 0)
        renderEncoder.drawPrimitives(type: .triangleStrip, vertexStart: 0, vertexCount: 4)

        renderEncoder.popDebugGroup()
        renderEncoder.endEncoding()

        commandBuffer.present(currentDrawable)
    }

    bufferIndex = (bufferIndex + 1) % MBEMaxInflightBuffers

    commandBuffer.commit()
 }
Jeshua Lacock
  • 5,730
  • 1
  • 28
  • 58
  • This is certainly possible. Since creating a `CVPixelBuffer` from an `IOSurface` isn't a supported operation on iOS, the fastest way to do this will probably be using an `AVAssetWriterInputPixelBufferAdaptor` (which wraps a pixel buffer pool), into which you can read the contents of your drawable's texture each frame. There are some examples of how to do this with OpenGL ES floating around. I'll try to post a more complete answer with all the specifics eventually, but perhaps this will help you along. Another option to consider is ReplayKit, if it's flexible enough to do what you want. – warrenm May 08 '17 at 16:58

2 Answers2

43

Here's a small class that performs the essential functions of writing out a movie file that captures the contents of a Metal view:

class MetalVideoRecorder {
    var isRecording = false
    var recordingStartTime = TimeInterval(0)

    private var assetWriter: AVAssetWriter
    private var assetWriterVideoInput: AVAssetWriterInput
    private var assetWriterPixelBufferInput: AVAssetWriterInputPixelBufferAdaptor

    init?(outputURL url: URL, size: CGSize) {
        do {
            assetWriter = try AVAssetWriter(outputURL: url, fileType: .m4v)
        } catch {
            return nil
        }

        let outputSettings: [String: Any] = [ AVVideoCodecKey : AVVideoCodecType.h264,
            AVVideoWidthKey : size.width,
            AVVideoHeightKey : size.height ]

        assetWriterVideoInput = AVAssetWriterInput(mediaType: .video, outputSettings: outputSettings)
        assetWriterVideoInput.expectsMediaDataInRealTime = true

        let sourcePixelBufferAttributes: [String: Any] = [
            kCVPixelBufferPixelFormatTypeKey as String : kCVPixelFormatType_32BGRA,
            kCVPixelBufferWidthKey as String : size.width,
            kCVPixelBufferHeightKey as String : size.height ]

        assetWriterPixelBufferInput = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: assetWriterVideoInput,
                                                                           sourcePixelBufferAttributes: sourcePixelBufferAttributes)

        assetWriter.add(assetWriterVideoInput)
    }

    func startRecording() {
        assetWriter.startWriting()
        assetWriter.startSession(atSourceTime: .zero)

        recordingStartTime = CACurrentMediaTime()
        isRecording = true
    }

    func endRecording(_ completionHandler: @escaping () -> ()) {
        isRecording = false

        assetWriterVideoInput.markAsFinished()
        assetWriter.finishWriting(completionHandler: completionHandler)
    }

    func writeFrame(forTexture texture: MTLTexture) {
        if !isRecording {
            return
        }

        while !assetWriterVideoInput.isReadyForMoreMediaData {}

        guard let pixelBufferPool = assetWriterPixelBufferInput.pixelBufferPool else {
            print("Pixel buffer asset writer input did not have a pixel buffer pool available; cannot retrieve frame")
            return
        }

        var maybePixelBuffer: CVPixelBuffer? = nil
        let status  = CVPixelBufferPoolCreatePixelBuffer(nil, pixelBufferPool, &maybePixelBuffer)
        if status != kCVReturnSuccess {
            print("Could not get pixel buffer from asset writer input; dropping frame...")
            return
        }

        guard let pixelBuffer = maybePixelBuffer else { return }

        CVPixelBufferLockBaseAddress(pixelBuffer, [])
        let pixelBufferBytes = CVPixelBufferGetBaseAddress(pixelBuffer)!

        // Use the bytes per row value from the pixel buffer since its stride may be rounded up to be 16-byte aligned
        let bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
        let region = MTLRegionMake2D(0, 0, texture.width, texture.height)

        texture.getBytes(pixelBufferBytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)

        let frameTime = CACurrentMediaTime() - recordingStartTime
        let presentationTime = CMTimeMakeWithSeconds(frameTime, preferredTimescale: 240)
        assetWriterPixelBufferInput.append(pixelBuffer, withPresentationTime: presentationTime)

        CVPixelBufferUnlockBaseAddress(pixelBuffer, [])
    }
}

After initializing one of these and calling startRecording(), you can add a scheduled handler to the command buffer containing your rendering commands and call writeFrame (after you end encoding, but before presenting the drawable or committing the buffer):

let texture = currentDrawable.texture
commandBuffer.addCompletedHandler { commandBuffer in
    self.recorder.writeFrame(forTexture: texture)
}

When you're done recording, just call endRecording, and the video file will be finalized and closed.

Caveats:

This class assumes the source texture to be of the default format, .bgra8Unorm. If it isn't, you'll get crashes or corruption. If necessary, convert the texture with a compute or fragment shader, or use Accelerate.

This class also assumes that the texture is the same size as the video frame. If this isn't the case (if the drawable size changes, or your screen autorotates), the output will be corrupted and you may see crashes. Mitigate this by scaling or cropping the source texture as your application requires.

warrenm
  • 31,094
  • 6
  • 92
  • 116
  • Hello Warren. I am having a problem probably because of the pixelFormat. I have a question here: https://stackoverflow.com/questions/46146593/cametallayer-drawable-texture-is-weird-on-some-devices and an issue on github here: https://github.com/svtek/SceneKitVideoRecorder/issues/3 – Ömer Karışman Sep 17 '17 at 14:39
  • Why do you use 'addScheduledHandler' instead of 'addCompletedHandler'? – bsabiston Jun 01 '18 at 16:37
  • 3
    I believe using scheduled handler is incorrect, as the block will run BEFORE the GPU has executed the command buffer. You need to use a completion handler. – ldoogy Jun 01 '18 at 17:40
  • 2
    Should also note MTKView's property framebufferOnly should be set to false. I was running into issues recently due to this and getting artifacts/crashes in the video – hartw Dec 17 '19 at 01:22
  • I get `Thread 1: EXC_BAD_ACCESS` at `texture.getBytes(...)`. Any ideas? Spent days trying to debug. – JCutting8 Jun 13 '20 at 02:11
  • Could be anything from an incorrectly sized buffer to a message sent to a released object. I recommend asking a new question, including your source code. – warrenm Jun 13 '20 at 07:22
  • https://stackoverflow.com/questions/62359253/mtltexture-getbytes-returning-exc-bad-access – JCutting8 Jun 13 '20 at 11:50
  • Just posted an edit to this answer, basically we need to manually release pixelBuffer from memory or within few seconds app will use all of the available device memory and will crash. – ImShrey Dec 08 '21 at 13:40
  • @zoe, can you explain your edit? – ImShrey Dec 20 '21 at 16:09
  • @shreyashshah There's an entire rejection reason for edits that are attempts to reply or answer, and was used in the first review: "This edit was intended to address the author of the post and makes no sense as an edit. It should have been written as a comment or an answer.". It shouldn't have been approved in the first place – Zoe Dec 20 '21 at 16:12
  • @zoe got it, though wouldn't it help other people coming to this page referring the answer, if they mis-out reading comments? Also writing whole new answer by just changing one line of code won't be ideal in this specific case. – ImShrey Dec 20 '21 at 16:20
  • The edit was also factually incorrect. In Swift, CF objects are memory-managed much like `NSObject`, so you can't actually call `CVPixelBufferRelease`. If you're seeing memory leaks in this code, it's possible that you're calling it on a background thread without wrapping it in an autorelease pool. Or you're using Objective-C. – warrenm Dec 20 '21 at 20:12
  • Oh Yes, actually both, objective C with that code running on background thread. This is was I was looking for as an explanation @warrenm. – ImShrey Dec 21 '21 at 04:03
7

Upgraded to Swift 5


import AVFoundation

class MetalVideoRecorder {
    var isRecording = false
    var recordingStartTime = TimeInterval(0)

    private var assetWriter: AVAssetWriter
    private var assetWriterVideoInput: AVAssetWriterInput
    private var assetWriterPixelBufferInput: AVAssetWriterInputPixelBufferAdaptor

    init?(outputURL url: URL, size: CGSize) {
        do {
          assetWriter = try AVAssetWriter(outputURL: url, fileType: AVFileType.m4v)
        } catch {
            return nil
        }

      let outputSettings: [String: Any] = [ AVVideoCodecKey : AVVideoCodecType.h264,
            AVVideoWidthKey : size.width,
            AVVideoHeightKey : size.height ]

      assetWriterVideoInput = AVAssetWriterInput(mediaType: AVMediaType.video, outputSettings: outputSettings)
        assetWriterVideoInput.expectsMediaDataInRealTime = true

        let sourcePixelBufferAttributes: [String: Any] = [
            kCVPixelBufferPixelFormatTypeKey as String : kCVPixelFormatType_32BGRA,
            kCVPixelBufferWidthKey as String : size.width,
            kCVPixelBufferHeightKey as String : size.height ]

        assetWriterPixelBufferInput = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: assetWriterVideoInput,
                                                                           sourcePixelBufferAttributes: sourcePixelBufferAttributes)

        assetWriter.add(assetWriterVideoInput)
    }

    func startRecording() {
        assetWriter.startWriting()
      assetWriter.startSession(atSourceTime: CMTime.zero)

        recordingStartTime = CACurrentMediaTime()
        isRecording = true
    }

    func endRecording(_ completionHandler: @escaping () -> ()) {
        isRecording = false

        assetWriterVideoInput.markAsFinished()
        assetWriter.finishWriting(completionHandler: completionHandler)
    }

    func writeFrame(forTexture texture: MTLTexture) {
        if !isRecording {
            return
        }

        while !assetWriterVideoInput.isReadyForMoreMediaData {}

        guard let pixelBufferPool = assetWriterPixelBufferInput.pixelBufferPool else {
            print("Pixel buffer asset writer input did not have a pixel buffer pool available; cannot retrieve frame")
            return
        }

        var maybePixelBuffer: CVPixelBuffer? = nil
        let status  = CVPixelBufferPoolCreatePixelBuffer(nil, pixelBufferPool, &maybePixelBuffer)
        if status != kCVReturnSuccess {
            print("Could not get pixel buffer from asset writer input; dropping frame...")
            return
        }

        guard let pixelBuffer = maybePixelBuffer else { return }

        CVPixelBufferLockBaseAddress(pixelBuffer, [])
        let pixelBufferBytes = CVPixelBufferGetBaseAddress(pixelBuffer)!

        // Use the bytes per row value from the pixel buffer since its stride may be rounded up to be 16-byte aligned
        let bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
        let region = MTLRegionMake2D(0, 0, texture.width, texture.height)

        texture.getBytes(pixelBufferBytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)

        let frameTime = CACurrentMediaTime() - recordingStartTime
        let presentationTime = CMTimeMakeWithSeconds(frameTime, preferredTimescale:   240)
        assetWriterPixelBufferInput.append(pixelBuffer, withPresentationTime: presentationTime)

        CVPixelBufferUnlockBaseAddress(pixelBuffer, [])
    
        // You need to release memory allocated to pixelBuffer
        CVPixelBufferRelease(pixelBuffer)
    }
}

EDIT: added CVPixelBufferRelease(pixelBuffer) to avoid memory leaks. Without this during each frame newly created pixelBuffer will stay in the memory and eventually app will you all of the available system memory.

ImShrey
  • 380
  • 4
  • 12
Oscar
  • 73
  • 1
  • 4
  • Xcode gives me the error hint: `'CVPixelBufferRelease' is unavailable: Core Foundation objects are automatically memory managed`. – Shuai Aug 05 '23 at 02:50