I know that NSRegularExpression
works on Unicode code points and (normal) JavaScript regex works on UTF-16 code units, but I don't know what should I change in my regex.
Regex: <text[^>]+>([^<]+)<\/text>
Works here: regex101
My parsing method:
func parseCaptions(text: String) -> String? {
let textRange = NSRange(location: 0, length: text.count)
let regex = try! NSRegularExpression(pattern: "<text[^>]+>([^<]+)<\\/text>")
let matches = regex.matches(in: text, range: textRange)
var result: String?
for match in matches {
let range = match.range
let first = text.index(text.startIndex, offsetBy: range.location)
let last = text.index(text.startIndex, offsetBy: range.location + range.length)
var string = String(text[first...last])
string = string.replacingOccurrences(of: "\n", with: " ")
string = string.replacingOccurrences(of: "&#39;", with: "'")
string = string.replacingOccurrences(of: "&quot;", with: "\"")
string.append("\n")
result = string
}
return result
}