I am trying to use the Vision from Apple in order to OCR and at the same time get the information on the coordinates of the boxes for each character.
Using VNDetectTextRectanglesRequest
gives you that but doesn't give the OCRed text.
Whereas VNRecognizeTextRequest
gives you the OCRed text but doesn't give you the coordinates of the characters.
I can use both of them, my concern is that the number of characters detected might be not equal to the number of characters recognized.
I do have looked at this stackoverflow post but didn't helped me much: https://stackoverflow.com/questions/44533148/converting-a-vision-vntextobservation-to-a-string
Code with Detect Text
:
func getCharacterBoxesHelper(request: VNRequest, error: Error?) {
let observations = request.results as! [VNTextObservation]
for currentObservation in observations {
for currentCharacterBox in currentObservation.characterBoxes! {
characterBoxes.append(currentCharacterBox)
}
} // for observations
} // getCharacterBoxesHelper
func getCharacterBoxes(_ croppedImage: UIImage, findVin: Bool) {
self.characterBoxes = []
let image = croppedImage
let requestHandler = VNImageRequestHandler(cgImage: image.cgImage!, orientation: CGImagePropertyOrientation(rawValue: UInt32(image.imageOrientation.rawValue))!, options: [:])
let request = VNDetectTextRectanglesRequest{ [weak self] req, err in
DispatchQueue.main.async {
self?.getCharacterBoxesHelper(request: req, error: err)
}
}
request.reportCharacterBoxes = true
DispatchQueue.global(qos: .userInitiated).async {
do {
try requestHandler.perform([request])
} catch let error as NSError {
print("Failed to perform image request: \(error)")
return
}
} // dispatch
} // getCharacterBoxes
} // extension
code for recognize text:
func readImage(_ croppedImage: UIImage, findX: Bool) {
self.croppedImage = croppedImage
let image = croppedImage
let requestHandler = VNImageRequestHandler(cgImage: image.cgImage!, orientation: CGImagePropertyOrientation(rawValue: UInt32(image.imageOrientation.rawValue))!, options: [:])
let request = VNRecognizeTextRequest { [weak self] req, err in
DispatchQueue.main.async {
self?.handle(request: req, error: err)
}
}
}
request.recognitionLevel = .accurate
request.usesLanguageCorrection = true
// Send the requests to the request handler.
DispatchQueue.global(qos: .userInitiated).async {
do {
try requestHandler.perform([request])
} catch let error as NSError {
print("Failed to perform image request: \(error)")
return
}
}
}
func handle(request: VNRequest, error: Error?) {
if let observations = request.results as? [VNRecognizedTextObservation] {
var recognizedTextArray = [String]()
for currentObservation in observations {
if let recognizedText = currentObservation.topCandidates(1).first {
recognizedTextArray.append(recognizedText.string)
}
}
} else {
print("No observation detected")
}
}