I have a set of 356 KMZ files (Google Maps polygons) representing 356 administrative units across the Philippines, with details on tree planting projects. I am only interested in some of those projects and so I need to 'unzip' those KMZ files to select only the ones I am interested in, converting them into KML files. This process extracts all the planting sites (even those I'm not interested in) as the selection process occurs later on in the workbook. The following is the code I've been given, which in theory should have worked upon changing the directories to my own. The code works for the first ~30 iterations but then gets the following error shown below. It is a lengthy and complicated for-loop, but if anyone has any ideas for how to fix the error below, adapted for this for-loop, I would greatly appreciate it please.
#Read in a character vector of all downloaded KMZ Google Maps files
kmzs<-list.files(str_c(basedir,"NGP Planting Spatial Data to 2019/"),pattern="*.kmz$")
#kmzs<-c("COTABATO 2013.kmz","BISLIG 2011.kmz")
#Remove file extension from all KMZ Google Maps file names
filenames<-gsub(".kmz","",kmzs)
for(i in 1:length(filenames)){
#Convert KMZ to a temporary KML file
t<-unzip(str_c(basedir,"NGP Planting Spatial Data to 2019/",kmzs[i]))[1]
#Read in the polygons data from the unzipped KMZ file (use ogrListLayers to search for the polygons layer name)
poly<-suppressWarnings(readOGR(t,layer=ogrListLayers(t)[1],require_geomType=c("wkbPolygon")))
if(kmzs[i]=="ILOILO 2013.kmz"){ #This Municipality/year has no Site ID data (there could be other new files with similar issues - need to manually check)
alldata<-alldata
} else {
#Extract out single polygons for each planting site contained
for(m in 1:length(poly)){
p<-poly[m,]
#Extract site-specific information to add to the site spatial info -- all of this information is stored in the site-specific polygon with the administrative KMZ files, and here is being 'copied' to the resulting site-specific KML files
provs<-gsub("Province","",gsub(" ","",unlist(str_split(data.frame(p)[,2],"<br>"))[which(grepl("Province",unlist(str_split(data.frame(p)[,2],"<br>"))))]))
cens<-gsub("CENRO","",gsub(" ","",unlist(str_split(data.frame(p)[,2],"<br>"))[which(grepl("CENRO",unlist(str_split(data.frame(p)[,2],"<br>"))))]))
muns<-gsub("Municipality","",gsub(" ","",unlist(str_split(data.frame(p)[,2],"<br>"))[which(grepl("Municipality",unlist(str_split(data.frame(p)[,2],"<br>"))))]))
brgs<-gsub("Barangay","",gsub(" ","",unlist(str_split(data.frame(p)[,2],"<br>"))[which(grepl("Barangay",unlist(str_split(data.frame(p)[,2],"<br>"))))]))
species<-gsub("Species","",gsub(" ","",unlist(str_split(data.frame(p)[,2],"<br>"))[which(grepl("Species",unlist(str_split(data.frame(p)[,2],"<br>"))))]))
NGParea<-gsub("AreainHectares","",gsub(" ","",unlist(str_split(data.frame(p)[,2],"<br>"))[which(grepl("Area in Hectares",unlist(str_split(data.frame(p)[,2],"<br>"))))]))
#Add centre coordinates and area (ha) information to the polygons data frame
p@data<-cbind(p@data[,1],provs,cens,muns,brgs,species,NGParea,data.frame(gCentroid(p,byid=TRUE)),gArea(spTransform(p,CRS("+init=EPSG:3857")))*0.0001,p@data[,2])
#Rename columns in the polygons data frame
names(p@data)<-c("Site","Province","CENRO","Municipality","Barangay","SpeciesPlanted","ReportedArea","X","Y","QuantifiedArea_hectares","RawData")
#Write out each individual planting site polygon as an ESRI Shapefile
suppressWarnings(writeOGR(p,dsn=str_c(basedir,"NGP Planting Spatial Data to 2019/KML/",as.vector(p@data[1,1]),".shp"),layer=as.vector(p@data[1,1]),driver="ESRI Shapefile",overwrite_layer=TRUE))
#Overwrite the original .kmz fields for outputting
if(m==1){
polynew<-p
} else {
polynew<-rbind(polynew,p)
}
}
#Write out whole CENRO polygons as an ESRI Shapefile
writeOGR(polynew,dsn=str_c(basedir,"NGP Planting Spatial Data to 2019/KML/",filenames[i],".shp"),layer=filenames[i],driver="ESRI Shapefile",overwrite_layer=TRUE)
#Create a new data frame from the polygons list, their centre coordinates, and the CENRO polygon file name
temp<-cbind(data.frame(polynew),rep(filenames[i],length(data.frame(polynew[,1]))))
names(temp)<-c("Site","Province","CENRO","Municipality","Barangay","SpeciesPlanted","ReportedArea","X","Y","QuantifiedArea_hectares","RawData","PolygonsFiles")
#Write out the created data frame (N.B. this is not really necessary)
write.csv(temp,file=str_c(basedir,"NGP Planting Spatial Data to 2019/KML/",filenames[i],".csv"),row.names=FALSE)
if(i==1){
#Create a new dataframe containing the spatial data for each site
alldata<-temp
} else {
#Bind the site spatial data from each CENRO together
names(temp)<-names(alldata)
alldata<-rbind(alldata,temp)
}
}
print(str_c("URL ",i," complete!!"))
}
##OUTPUT:
OGR data source with driver: KML
Source: "/Users/badiskhiari/doc.kml", layer: "Alaminos_City_2012"
with 62 features;
Selected wkbPolygon feature type, with 62 rows
It has 2 fields
NOTE: keeping only 62 wkbPolygon of 62 features
Error in writeOGR(p, dsn = str_c(basedir, "NGP Planting Spatial Data to 2019/KML/", :
number of objects mismatch
In addition: Warning message:
In data.frame(..., check.names = FALSE) :
row names were found from a short variable and have been discarded
##After getting the above to work, I then have to run the following as the next step of the pipeline
#Rename columns in the results data frame
names(alldata)<-c("Site","Province","CENRO","Municipality","Barangay","SpeciesPlanted","ReportedArea","X","Y","QuantifiedArea_hectares","RawData","PolygonsFiles")
#t<-read.csv(str_c(basedir,"AllNGPPlantingSpatialData.csv"),h=T)
#alldata<-rbind(t,alldata)
#Write out the results data frame
write.csv(alldata,file=str_c(basedir,"NGP Planting Spatial Data to 2019/AllNGPPlantingSpatialData_To2019.csv"),row.names=FALSE)