[iOS] OCR(1) - Tesseract
Tesseract ?
/* Tesseract는 다양한 운영 체제를 위한 광학 문자 인식 엔진이다.
Apache License 2.0에 따라 배포되는 무료 소프트웨어이다.
초기에는 영어만 인식할 수 있었지만, 최근에는 한글까지 지원한다. */
// 22.05.09 기준 최근 안정적인 버전은 5.1.0으로 22.03.01에 배포
// 한국어 지원 가능
How to use ?
// Podfile
target 'OCRTest' do
use_frameworks!
pod 'TesseractOCRiOS'
end
// Project build setting
// Enable Bitcode를 No로 변경 (default = Yes)
// tessdata folder
/* AppDelegate.swift와 같은 레벨에 위치하라고 한다.
나는 SwiftUI를 기반으로 하기 때문에 ContentView와 같은 레벨에 뒀다 */
/* 주의할 점
폴더를 프로젝트에 넣을 때 "Create folder references" 로 체크를 해줘야한다 */
// info.plist
/* Privacy - Camera Usage Description
카메라 접근권한 추가 */
// CameraView.swift
/* 더미 이미지를 사용하지않고 카메라로 스캔하기 위해서 카메라 사용 */
import SwiftUI
import AVFoundation
import TesseractOCR
struct CameraView: UIViewRepresentable {
let viewModel: TesseractViewModel
class VideoPreviewView: UIView {
override class var layerClass: AnyClass {
AVCaptureVideoPreviewLayer.self
}
var videoPreviewLayer: AVCaptureVideoPreviewLayer {
return layer as! AVCaptureVideoPreviewLayer
}
}
func makeUIView(context: Context) -> VideoPreviewView {
let view = VideoPreviewView()
view.backgroundColor = .black
view.videoPreviewLayer.videoGravity = .resizeAspectFill
view.videoPreviewLayer.cornerRadius = 0
view.videoPreviewLayer.session = viewModel.session
view.videoPreviewLayer.connection?.videoOrientation = .portrait
viewModel.requestAndCheckPermissions()
if let tesseract = G8Tesseract(language: "kor") {
viewModel.tesseract = tesseract
tesseract.delegate = context.coordinator
}
return view
}
func updateUIView(_ uiView: VideoPreviewView, context: Context) {
viewModel.output.setSampleBufferDelegate(context.coordinator, queue: DispatchQueue.main)
}
// Delegate 사용을 위한 Coordinator
func makeCoordinator() -> Coordinator {
return Coordinator(viewModel: viewModel)
}
class Coordinator: NSObject, AVCaptureVideoDataOutputSampleBufferDelegate, G8TesseractDelegate {
let viewModel: TesseractViewModel
init(viewModel: TesseractViewModel) {
self.viewModel = viewModel
}
// 카메라로 스캔한 이미지를 전달
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let bufferImage = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
let ciImage = CIImage(cvImageBuffer: bufferImage)
let context = CIContext(options: nil)
let cgImage = context.createCGImage(ciImage, from: ciImage.extent)!
let image = UIImage(cgImage: cgImage)
viewModel.scanImage(image)
}
func progressImageRecognition(for tesseract: G8Tesseract) {
print("recognition progress \\(tesseract.progress)%")
}
}
}
// TesseractViewModel.swift
import AVFoundation
import TesseractOCR
import SwiftUI
class TesseractViewModel: ObservableObject {
var isConfigure = false
let output = AVCaptureVideoDataOutput()
var tesseract = G8Tesseract()
@Published var session: AVCaptureSession = AVCaptureSession()
@Published var result: String = ""
// 카메라 권한 요청
func requestAndCheckPermissions() {
switch AVCaptureDevice.authorizationStatus(for: .video) {
case .notDetermined:
// 권한 요청
AVCaptureDevice.requestAccess(for: .video) { authStatus in
if authStatus {
DispatchQueue.main.async {
self.setupCamera()
}
}
}
case .restricted:
break
case .authorized:
// 이미 권한 받은 경우 셋업
setupCamera()
default:
// 거절했을 경우
print("Permession declined")
}
}
func setupCamera() {
if isConfigure == false {
initInput()
initOutput()
isConfigure = true
}
}
func initInput() {
// Video 방식으로 촬영하겠다.
guard let captureDevice = AVCaptureDevice.default(for: AVMediaType.video) else {
return
}
do {
let input = try AVCaptureDeviceInput(device: captureDevice)
session.addInput(input)
} catch {
print("error")
}
}
func initOutput() {
session.addOutput(output)
}
func scanImage(_ image: UIImage) {
tesseract.image = image
tesseract.recognize()
if let recognizedText = tesseract.recognizedText {
session.stopRunning()
result = recognizedText
}
}
}
// TesseractView.swift
import SwiftUI
struct TesseractView: View {
@ObservedObject private var viewModel: TesseractViewModel
init() {
self.viewModel = TesseractViewModel()
}
var body: some View {
VStack {
Spacer()
CameraView(viewModel: viewModel)
.onAppear {
viewModel.session.startRunning()
}
.onDisappear {
viewModel.session.stopRunning()
}
Spacer()
Text(viewModel.result)
Spacer()
}
.frame(minWidth: 0, maxWidth: .infinity, minHeight: 0, maxHeight: .infinity)
.edgesIgnoringSafeArea(.all)
}
}
struct TesseractView_Previews: PreviewProvider {
static var previews: some View {
TesseractView()
}
}
// 사용 기술
/* MVVM
SwiftUI */
// 사용후기
/* 인식이 원래 이렇게 구린건가..
내가 뭔가 잘못한건가.. */
Reference
Install
https://github.com/gali8/Tesseract-OCR-iOS/wiki/Installation
GitHub - gali8/Tesseract-OCR-iOS: Tesseract OCR iOS is a Framework for iOS7+, compiled also for armv7s and arm64.
Tesseract OCR iOS is a Framework for iOS7+, compiled also for armv7s and arm64. - GitHub - gali8/Tesseract-OCR-iOS: Tesseract OCR iOS is a Framework for iOS7+, compiled also for armv7s and arm64.
github.com
traineddata
https://github.com/tesseract-ocr/tessdata/tree/3.04.00
GitHub - tesseract-ocr/tessdata: Trained models with support for legacy and LSTM OCR engine
Trained models with support for legacy and LSTM OCR engine - GitHub - tesseract-ocr/tessdata: Trained models with support for legacy and LSTM OCR engine
github.com
Github
https://github.com/gali8/Tesseract-OCR-iOS
GitHub - gali8/Tesseract-OCR-iOS: Tesseract OCR iOS is a Framework for iOS7+, compiled also for armv7s and arm64.
Tesseract OCR iOS is a Framework for iOS7+, compiled also for armv7s and arm64. - GitHub - gali8/Tesseract-OCR-iOS: Tesseract OCR iOS is a Framework for iOS7+, compiled also for armv7s and arm64.
github.com
제가 틀렸거나 다른 의견이 있으시다면 댓글로 남겨주시면 감사하겠습니다 :)