In my project for OS X Command Line Tool using Swift 3.0 (Beta 2) I need to convert HTML data to String from multiple URLs. There is a problem in use of such function with many background tasks (it's not working except the main thread, so maybe there is more elegant way to control completion of all tasks and read HTML data in such tool with or without parsers that I need for Swift 3 and Mac OS X (Linux in the near future)):
func html2text (html: String, usedEncoding: String.Encoding) -> String {
let data = html.data(using: usedEncoding)!
if let htmlString = AttributedString(html: data, options: [NSDocumentTypeDocumentAttribute: NSHTMLTextDocumentType, NSCharacterEncodingDocumentAttribute: usedEncoding.rawValue], documentAttributes: nil)?.string {
return htmlString
} else {
return ""
}
}
So I read data first into an Array, waiting when all DataTasks finished and then converting it in the main thread. Also using global variable (Set of urls) to control completion of each task:
import Foundation
import WebKit
var urlArr = [String]()
var urlSet = Set<String>()
var htmlTup : [(url : String, html : String, encoding : String.Encoding)] = []
let session = URLSession.shared
For-in loop with multiple URLSession DataTasks
for myurl in urlArr {
if urlSet.insert(myurl).inserted {
print ("Loading \(myurl)...")
let inputURL = URL(string: myurl)!
let task = session.dataTask(with: inputURL, completionHandler: {mydata, response, error in
Read Encoding from HTML First
var usedEncoding = String.Encoding.utf8
if let encodingName = response!.textEncodingName {
let encoding = CFStringConvertIANACharSetNameToEncoding(encodingName)
if encoding != kCFStringEncodingInvalidId {
usedEncoding = String.Encoding(rawValue: CFStringConvertEncodingToNSStringEncoding(encoding))
}
}
Do some work with HTML String and read data into an Array
if let myString = String(data: mydata!, encoding: usedEncoding) {
htmlTup += [(url: myurl,html: myString, encoding: usedEncoding)]
}
// The end of task removing URL from Set
urlSet.remove(myurl)
})
//Run Task
task.resume()
}
}
}
Waiting for tasks to complete and convert HTML to text
while !urlSet.isEmpty {
// Do nothing
}
for (url,html,encoding) in htmlTup {
print ("Writing data from \(url)...")
print (html2text(html: html, usedEncoding: encoding))
}
Update 1: RunLoop in the main thread from this Such code to check when each task finished:
var taskArr = [Bool]()
let task = session.dataTask(with: request) { (data, response, error) in
}
taskArr.removeLast()
}
taskArr.append(true)
task.resume()
// Waiting for tasks to complete
let theRL = RunLoop.current
while !taskArr.isEmpty && theRL.run(mode: .defaultRunLoopMode, before: .distantFuture) { }