I have developed a graphlearner
with the mlr3
package and I would like to publish it in a Rplumber
service. However, when I receive the data to make predictions (data in JSON format), the graphlearner
has trouble recognizing the data because the fromJSON
function of jsonlite
does not infer the right types (on which the graph was learned). Do you have a solution for this ? is there a mechanism to manage JSON data in mlr3 in the prediction phase?
Learning step
library(mlr3)
imp_missind = po("missind")
imp_fct = po("imputenewlvl", param_vals =list(affect_columns = selector_type("factor")))
imp_num = po("imputehist", param_vals =list(affect_columns = selector_type("numeric")))
learner = lrn('regr.ranger')
graph = po("copy", 2) %>>%
gunion(list(imp_missind, imp_num %>>% imp_fct)) %>>%
po("featureunion") %>>%
po(learner)
t1 = tsk("boston_housing")
g1 = GraphLearner$new(graph)
g1$train(t1)
saveRDS(g1,'my-model')
Predction step : it works (simulate data to prediction, remove target col)
data=t1$data()[1:1,-1]
model = readRDS('my-model')
model$predict_newdata(newdata=data)
Predction step : it does not work (simulate JSON data to prediction)
model = readRDS('my-model')
data = t1$data()[1:1,-1]
json = fromJSON(toJSON(data, na="string"))
model$predict_newdata(newdata=json)
and the error :
Erreur : Cannot rbind task: Types do not match for column: cmedv (numeric != integer)
UPDATE reproducible example
library(mlr3learners)
library(mlr3)
library(mlr3pipelines)
library(jsonlite)
imp_missind = po("missind")
imp_fct = po("imputenewlvl", param_vals =list(affect_columns = selector_type("factor")))
imp_num = po("imputehist", param_vals =list(affect_columns = selector_type("numeric")))
learner = lrn('regr.ranger')
graph = po("copy", 2) %>>%
gunion(list(imp_missind, imp_num %>>% imp_fct)) %>>%
po("featureunion") %>>%
po(learner)
task = tsk("boston_housing")
graphlearner = GraphLearner$new(graph)
#train model
graphlearner$train(task)
# create data to predict (juste one observation)
data= task$data()
data[1:1, chas := NA]
data = data[1:1,-1]
# look the the types of columns
str(data)
# predictin, this works fine
predict(graphlearner, data)
# simulate the case when json data is received
json_data = toJSON(data, na="string")
print(json_data)
# get R data from json formatted data
data_from_json = fromJSON(json_data)
# look the types of columns, some are different numeric != integer, factor != char
str(data_from_json)
# try to predict, this does not work, get erro : cmedv (numeric != integer)
predict(graphlearner,data_from_json)