I am trying to implement the built in kmeans()
function using POSTGIS as follows:
create or replace FUNCTION kmeans (number_of_clusters INTEGER) RETURNS TEXT AS
$$
begin
with data as(
select at2."name" as area_type, a.name,bf.* from building_footprints bf
left join area a on bf."areaID" = a."ID"
left join area_type at2 on at2."ID" = a."areaTypeID"
where a."ID" in (select "areaID" from campaign_scope)
and at2."name"='Uc'
)
,
dbscan as(
select data.*,ST_ClusterDBSCAN(st_transform(data."geoJson",24313),eps := 50, minPoints := 1)
OVER(ORDER BY data."ID") AS cluster_id from data)
,
filter as(
select cluster_id,count("ID") from dbscan
group by cluster_id
having count("ID") < 10
)
,
accepted_dbscan as
(
select * from dbscan
where cluster_id in (select cluster_id from filter)
)
,
rejected_dbscan as
(
select * from dbscan
where cluster_id not in (select cluster_id from filter)
)
RETURN 'SELECT "ID", ST_ClusterKMeans(rejected_dbscan."geoJson", ' || number_of_clusters || ') OVER() AS cluster_id FROM rejected_dbscan';
END;
$$ LANGUAGE PLPGSQL;
This gives an error:
SQL Error [42601]: ERROR: syntax error at or near "RETURN"
Does the entire query need to be in the RETURN
clause perhaps? I am also concerned whether to RETURN TEXT or a table with different specified columns.
UPDATE: I have also tried the following:
create or replace FUNCTION kmeans (number_of_clusters INTEGER)
RETURNS TABLE (ID varchar ,
cluster_id int)
LANGUAGE plpgsql AS
$func$
DECLARE
number_of_clusters int;
BEGIN
RETURN QUERY
with data as(
select at2."name" as area_type, a.name,bf.* from building_footprints bf
left join area a on bf."areaID" = a."ID"
left join area_type at2 on at2."ID" = a."areaTypeID"
where a."ID" in (select "areaID" from campaign_scope)
and at2."name"='Uc'
)
,
dbscan as(
select data.*,ST_ClusterDBSCAN(st_transform(data."geoJson",24313),eps := 50, minPoints := 1)
OVER(ORDER BY data."ID") AS cluster_id from data)
,
filter as(
select cluster_id,count("ID") from dbscan
group by cluster_id
having count("ID") < 10
)
,
accepted_dbscan as
(
select * from dbscan
where cluster_id in (select cluster_id from filter)
)
,
rejected_dbscan as
(
select * from dbscan
where cluster_id not in (select cluster_id from filter)
)
SELECT "ID", ST_ClusterKMeans(rejected_dbscan."geoJson", ' || number_of_clusters || ') OVER() AS cluster_id FROM rejected_dbscan
END
$func$;
but again, this gives the following error:
SQL Error [42601]: ERROR: syntax error at or near "END"
Position: 1114
Second update: based on the comment by Luuk, I have updated the code to:
create or replace FUNCTION kmeans (number_of_clusters INTEGER)
RETURNS TABLE (ID varchar ,
cluster_id int)
LANGUAGE plpgsql AS
$$
DECLARE
number_of_clusters int;
BEGIN
RETURN QUERY
with data as(
select at2."name" as area_type, a.name,bf.* from building_footprints bf
left join area a on bf."areaID" = a."ID"
left join area_type at2 on at2."ID" = a."areaTypeID"
where a."ID" in (select "areaID" from campaign_scope)
and at2."name"='Uc'
)
,
dbscan as(
select data.*,ST_ClusterDBSCAN(st_transform(data."geoJson",24313),eps := 50, minPoints := 1)
OVER(ORDER BY data."ID") AS cluster_id from data)
,
filter as(
select cluster_id,count("ID") from dbscan
group by cluster_id
having count("ID") < 10
)
,
accepted_dbscan as
(
select * from dbscan
where cluster_id in (select cluster_id from filter)
)
,
rejected_dbscan as
(
select * from dbscan
where cluster_id not in (select cluster_id from filter)
)
SELECT "ID", ST_ClusterKMeans(rejected_dbscan."geoJson", ' || number_of_clusters || ') OVER() AS cluster_id FROM rejected_dbscan
end;
$$;
But I am being presented with this error -
SQL Error [42601]: ERROR: syntax error at or near "end"