0

Regex function gives an error on languages other than English. Even if English sentence doesn't match the pattern, function works. In other languages it throws an error, instead of ignoring the pattern and outputing the string as it was on input.

How to fix it? Or, where is the problem?

decompose("10 minutes have fun") // <- ok, string with pattern match
decompose("gggggg") // <- ok
decompose("salut chéri") // <- error
decompose("عرب") // <- error
decompose("日本語") // <- error


func decompose(_ inputText: String) -> (time: String, task: String) {
    
    let pattern = #"(\d+)\s*minute(s)?|(\d+)\s*hour"#
    let regexOptions: NSRegularExpression.Options = [.caseInsensitive]
    let matchingOptions: NSRegularExpression.MatchingOptions = [.reportCompletion]
    let range = NSRange(location: 0, length: inputText.utf8.count)
    
    var time = ""
    var taskName = inputText
    
    let regex = try? NSRegularExpression(pattern: pattern, options: regexOptions)
    
    if let matchIndex = regex?.firstMatch(in: inputText, options: matchingOptions, range: range) {
        
        let startIndex = inputText.index(inputText.startIndex, offsetBy: matchIndex.range.lowerBound)
        print(startIndex)
        let endIndex = inputText.index(inputText.startIndex, offsetBy: matchIndex.range.upperBound)
        print(endIndex)
        
        time = String(inputText[startIndex..<endIndex])
        time = time.trimmingCharacters(in: .whitespaces)

        taskName.removeSubrange(startIndex..<endIndex)
        
        
    } else {
        print("\n No match for --- \(inputText) ---. \n")
    }

    return (time, taskName)
}

var strWithMatch = "10 minutes have fun"
var strEn = "gggggg"
var strJa = "日本語"
var strFr = "salut chéri"
var strAr = "عرب"


MikeMaus
  • 385
  • 3
  • 22
  • 2
    `let range = NSRange(location: 0, length: inputText.utf8.count)` Should be `utf16.count`. – Larme Jun 07 '21 at 08:12
  • @Larme yessss! thank you, never would come up to it. Would you like to post as answer? – MikeMaus Jun 07 '21 at 08:46
  • 1
    I'ts fine to mark your question as duplicate. I wouldn't delete your, just mark it as duplicate since a user having an error might find your and the the linked one (different key words/causes, but in the end it's the same encoding issue). – Larme Jun 07 '21 at 08:52

0 Answers0