2

I need a function to normalize my input table features values. My features table has 9 columns out of which x1,x2...x6 are the input columns I need to scale.

I'm able to do it by using a static query:

create or replace function scale_function()
returns void as $$
declare tav1 features%rowtype; rang1 features%rowtype; 

begin   
    select avg(n),avg(x0),avg(x1),avg(x2),avg(x3),avg(x4),avg(x5),avg(x6),avg(y)
    into tav1 from features;

    select max(n)-min(n),max(x0)-min(x0),max(x1)-min(x1),max(x2)-min(x2),max(x3)-min(x3),
    max(x4)-min(x4),max(x5)-min(x5),max(x6)-min(x6),max(y)-min(y)
    into rang1 from features;

    update features
    set     x1= (x1-tav1.x1)/(rang1.x1),x2= (x2-tav1.x2)/(rang1.x2),
        x3= (x3-tav1.x3)/(rang1.x3),x4= (x4-tav1.x4)/(rang1.x4),
        x5= (x5-tav1.x5)/(rang1.x5),x6= (x6-tav1.x6)/(rang1.x6),
        y= (y-tav1.y)/(rang1.y);

return;
end;
$$ language plpgsql;

But now I require a dynamic query to scale n column values i.e., x1,x2...,xn (say I've 200+ columns) in my features table. I'm trying this code but this won't work as there is an issue with a user defined data type:

create or replace function scale_function(n int)
returns void as $$
declare
   tav1 features%rowtype;
   rang1 features%rowtype;
   query1 text :=''; query2 text :=''; 

begin   
for i in 0..n
    loop
        query1 := query1 ||',avg(x'||i||')';
        query2 := query2||',max(x'||i||')-min(x'||i||')';
        end loop;

    query1 := 'select avg(n)'||query1||',avg(y) into tav1 from features;';
    execute query1;

    query2 := 'select max(n)-min(n)'||query2||',max(y)-min(y) into rang1 from features;';
    execute query2;


update features
set    x1= (x1-tav1.x1)/(rang1.x1), ... ,xn=(xn-tav1.xn)/(rang1.xn)
      ,y= (y-tav1.y)/(rang1.y);

return;
end;
$$ language plpgsql;

Here I'm trying to take the avg() values of the columns into a user-defined rowtype tav1 and have to use that tav1 value to update.

Can any one help me how to update the features table values using dynamic query for 'n' such columns?

************ Error ************

ERROR: column "avg" specified more than once

SQL state: 42701

Context: SQL statement "select avg(n),avg(x0),avg(x1),avg(x2),avg(x3),avg(x4),avg(x5),avg(x6),avg(y) into tav1 from features;"

PL/pgSQL function scale_function(integer) line 12 at EXECUTE statement

I'm using PostgreSQL 9.3.0.

Erwin Brandstetter
  • 605,456
  • 145
  • 1,078
  • 1,228

1 Answers1

0

Basic UPDATE

Replace the first query with this much shorter and more efficient single UPDATE command:

UPDATE features
SET   (x1,x2,x3,x4,x5,x6, y)   
    = ((x1 - g.avg1) / g.range1
     , (x2 - g.avg2) / g.range2
 --  , (x3 - ...
     , (y  - g.avgy) / g.rangey)
FROM (
   SELECT avg(x1) AS avg1, max(x1) - min(x1) AS range1
        , avg(x2) AS avg2, max(x2) - min(x2) AS range2
     -- , avg(x3) ...
        , avg(y) AS avgy, max(y) - min(y) AS rangey
   FROM   features
   ) g;

About the short UPDATE syntax:

Dynamic function

Building on the simpler query, here is a dynamic function for any number of columns:

CREATE OR REPLACE FUNCTION scale_function_dyn()
  RETURNS void AS
$func$
DECLARE
   cols text;  -- list of target columns
   vals text;  -- list of values to insert
   aggs text;  -- column list for aggregate query
BEGIN
   SELECT INTO cols, vals, aggs
          string_agg(quote_ident(attname), ', ')
        , string_agg(format('(%I - g.%I) / g.%I'
                          , attname, 'avg_' || attname, 'range_' || attname), ', ')
        , string_agg(format('avg(%1$I) AS %2$I, max(%1$I) - min(%1$I) AS %3$I'
                          , attname, 'avg_' || attname, 'range_' || attname), ', ')
   FROM   pg_attribute
   WHERE  attrelid = 'features'::regclass
   AND    attname NOT IN ('n', 'x0')  -- exclude columns from update
   AND    NOT attisdropped            -- no dropped (dead) columns
   AND    attnum > 0;                 -- no system columns

   EXECUTE format('UPDATE features
                   SET   (%s) = (%s)
                   FROM  (SELECT %s FROM features) g'
                 , cols, vals, aggs);

END
$func$  LANGUAGE plpgsql;

Related answer with more explanation:

SQL Fiddle.

Community
  • 1
  • 1
Erwin Brandstetter
  • 605,456
  • 145
  • 1,078
  • 1,228