The conversion is a function of the device orientation, as well as the camera position (front or back). The most accurate function I've found so far is this gist (or this other answer), which works great for the Vision framework. Here's a slightly modified version of the same gist retaining the same logic:
extension CGImagePropertyOrientation {
init(isUsingFrontFacingCamera: Bool, deviceOrientation: UIDeviceOrientation = UIDevice.current.orientation) {
switch deviceOrientation {
case .portrait:
self = .right
case .portraitUpsideDown:
self = .left
case .landscapeLeft:
self = isUsingFrontFacingCamera ? .down : .up
case .landscapeRight:
self = isUsingFrontFacingCamera ? .up : .down
default:
self = .right
}
}
}
I tried verifying the results using this method::
Create a new project in Xcode 11.6
Add NSCameraUsageDescription
to info.plist
.
Replace ViewController.swift with the code below.
Update devicePositionToTest
to front/back depending on which one you want to test.
Replace SEARCH STRING HERE
with a piece of text you are going to scan.
Run the app, and point it at the text, while changing orientations.
You will make the following observations:
- Back camera:
.portrait
: .right
and .up
both work.
.landscapeRight
: .down
and .right
.
.portraitUpsideDown
: .left
and .down
.
.landscapeLeft
: .up
and .left
.
- Front camera:
.portrait
: .right
and .up
.
.landscapeRight
: .up
and .left
.
.portraitUpsideDown
: .left
and .down
.
.landscapeLeft
: .down
and .right
.
Notice how no matter what camera/device orientation, there are always two different orientations that will work. This is because when in the portrait + back camera orientation, left to right text is recognized normally (as you would expect), but also text flowing top to bottom will be recognized.
However, the first orientation listed above is more accurate than the second. You'll get a lot more junk data if you go with the second column on each of these. You can verify this by printing out the entire results of allStrings
below.
Note that this was only tested for the vision framework. If you're using the sample buffer for something else, or have the camera configured differently, you may need a different conversion function.
import AVFoundation
import UIKit
import Vision
let devicePositionToTest = AVCaptureDevice.Position.back
let expectedString = "SEARCH STRING HERE"
class ViewController: UIViewController {
let captureSession = AVCaptureSession()
override func viewDidLoad() {
super.viewDidLoad()
// 1. Set up input
let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: devicePositionToTest)!
if device.isFocusModeSupported(.continuousAutoFocus) {
try! device.lockForConfiguration()
device.focusMode = .continuousAutoFocus
device.unlockForConfiguration()
}
let input = try! AVCaptureDeviceInput(device: device)
captureSession.addInput(input)
// 2. Set up output
let output = AVCaptureVideoDataOutput()
output.alwaysDiscardsLateVideoFrames = true
output.setSampleBufferDelegate(self, queue: DispatchQueue(label: "com.example"))
captureSession.addOutput(output)
// 3. Set up connection
let connection = output.connection(with: .video)!
assert(connection.isCameraIntrinsicMatrixDeliverySupported)
connection.isCameraIntrinsicMatrixDeliveryEnabled = true
let previewView = CaptureVideoPreviewView(frame: CGRect(x: 0, y: 0, width: 400, height: 400))
previewView.videoPreviewLayer.videoGravity = .resizeAspect
previewView.videoPreviewLayer.session = captureSession
view.addSubview(previewView)
captureSession.startRunning()
}
}
extension ViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
let cameraIntrinsicData = CMGetAttachment(sampleBuffer, key: kCMSampleBufferAttachmentKey_CameraIntrinsicMatrix, attachmentModeOut: nil)!
let options: [VNImageOption: Any] = [.cameraIntrinsics: cameraIntrinsicData]
let allCGImageOrientations: [CGImagePropertyOrientation] = [.up, .upMirrored, .down, .downMirrored, .leftMirrored, .right, .rightMirrored, .left]
allCGImageOrientations.forEach { orientation in
let imageRequestHandler = VNImageRequestHandler(
cvPixelBuffer: pixelBuffer,
orientation: orientation,
options: options)
let request = VNRecognizeTextRequest { value, error in
let observations = value.results as! [VNRecognizedTextObservation]
let allStrings = observations.compactMap { $0.topCandidates(1).first?.string.lowercased() }.joined(separator: " ")
if allStrings.contains(expectedString) {
// FOUND MATCH. deviceOrientation: @UIDevice.current.orientation@. exifOrientation: @orientation@.
print("FOUND MATCH. deviceOrientation: \(UIDevice.current.orientation). exifOrientation: \(orientation)")
}
}
request.recognitionLevel = .accurate
request.usesLanguageCorrection = true
try! imageRequestHandler.perform([request])
}
}
}
class CaptureVideoPreviewView: UIView {
override class var layerClass: AnyClass {
return AVCaptureVideoPreviewLayer.self
}
var videoPreviewLayer: AVCaptureVideoPreviewLayer {
layer as! AVCaptureVideoPreviewLayer
}
}