With the following code, I have been able to extract the numbers in the table. First, I converted the image to a PDF file. Afterwards, I converted the PDF file to a word file. I finally extracted the tables from the word file. This solution only works on Windows.
library(RDCOMClient)
library(magick)
path_PDF <- "D:\\image_Stackoverflow79.pdf"
path_PNG <- "D:\\Dropbox\\Reponses_Stackoverflow\\image_Stackoverflow79.png"
path_Word <- "D:\\image_Stackoverflow79.docx"
pdf(path_PDF, height = 8, width = 6)
im <- image_read(path_PNG)
im <- image_crop(im, geometry = geometry_area(width = 510, height = 310, x_off = 100, y_off = 110))
plot(im)
dev.off()
wordApp <- COMCreate("Word.Application")
wordApp[["Visible"]] <- TRUE
wordApp[["DisplayAlerts"]] <- FALSE
doc <- wordApp[["Documents"]]$Open(normalizePath(path_PDF),
ConfirmConversions = FALSE)
doc$SaveAs2(path_Word)
nb_Row <- doc$tables(1)$Rows()$Count()
nb_Col <- doc$tables(1)$Columns()$Count()
mat_Temp <- matrix(NA, nrow = nb_Row, ncol = nb_Col)
for(i in 1 : nb_Row)
{
for(j in 1 : nb_Col)
{
mat_Temp[i, j] <- tryCatch(doc$tables(1)$cell(i, j)$range()$text(), error = function(e) NA)
}
}
mat_Temp
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] "\r\a" "\r\a" "\r\a" "\r\a" "\r\a" "\r\a" "\r\a" "\r\a"
[2,] "\r\a" "0.46\r\a" "0.46\r\a" "0.46\r\a" "0.46\r\a" "0.46\r\a" "0.46\r\a" "\r\a"
[3,] "\r\a" "1.00\r\a" "0.00\r\a" "0.98\r\a" "0.03\r\a" "0.95\r\a" "0.85\r\a" NA
[4,] "\r\a" "0.025\r\a" "0.025\r\a" "0.025\r\a" "0.025\r\a" "0.025\r\a" "0.025\r\a" NA
[5,] "\r\a" "0.005\r\a" "0.005\r\a" "0.005\r\a" "0.005\r\a" "0.005\r\a" "0.005\r\a" NA
[6,] "\r\a" "1.49\r\a" "0.49\r\a" "1.47\r\a" "0.52\r\a" "1.44\r\a" "1.34\r\a" "\r\a"
[7,] "\r\a" "0.002\r\a" "0.002\r\a" "0.002\r\a" "0.002\r\a" "0.002\r\a" "0.002\r\a" "\r\a"
[8,] "\r\a" "1.492\r\a" "0.492\r\a" "1472\r\a" "0.522\r\a" "1.442\r\a" "1.342\r\a" "\r\a"
[9,] "\r\a" "1.59\r\a" "\r\a" "1.22\r\a" "\r\a" "\r\a" "\r\a" "\r\a"
[10,] "\r\a" "1.493\r\a" "0.493\r\a" "1473\r\a" "0.523\r\a" "1.443\r\a" "1.343\r\a" "\r\a"
[11,] "\r\a" "0.107\r\a" "o. 108\r\a" "o. 105\r\a" "0.108\r\a" "0.106\r\a" "0.104\r\a" "\r\a"
[12,] "\r\a" "\r\a" "\r\a" NA NA NA NA NA
With this approach, the numbers seem to be in the good columns.