2

I am trying to retrieve a hierarchical ordered result from a query on an auto referenced table like this:

create table category (
  id          serial,
  
  -- parent category, 
  parent_id   integer default null, -- null for root category
  
  -- tree control
  tree_depth  smallint not null, -- calculated

  primary key (id),
  unique (parent_id, id),
  foreign key (parent_id) references category (id)
);

This is a common approach to store a tree of categories except for the need for supporting multiple languages. To that aim, we join a language-dependent table like this:

create table category_lang (
  id            serial,
  
  -- natural primary key
  category_id   integer not null,
  lang_code     char(2) not null,
  
  -- language-dependent data
  title         varchar(128) not null,
  
  primary key (id),
  unique (category_id, lang_code)
);

The tree_depth column is calculated in a before insert trigger like this:

create or replace function fn_category__bins () returns trigger as $$
begin
  -- calculate tree_depth as parent tree_depth + 1
  if new.parent_id is null then
    new.tree_depth = 0;
  else
    new.tree_depth = (select tree_depth from category where id = new.parent_id limit 1) + 1;
  end if;

  return new;
end;
$$ language plpgsql;

create trigger tg_category__bins before insert on category for each row
execute procedure fn_category__bins();

We populate the tables with easy to read texts in two languages:

insert into category (parent_id, id) values 
(null, 1),
(null, 2),
(null, 3),

(1, 11),
(1, 12),
(1, 13),

(2, 21),
(2, 22),
(3, 31),

(21, 211),
(21, 212),
(21, 213);

-- lang_code = 'EN'
insert into category_lang (category_id, title, lang_code) values 
(1, 'One',   'EN'),
(2, 'Two',   'EN'),
(3, 'Three', 'EN'),

(11, 'One.One',   'EN'),
(12, 'One.Two',   'EN'),
(13, 'One.Three', 'EN'),

(21, 'Two.One',   'EN'),
(22, 'Two.Two',   'EN'),
(31, 'Three.One', 'EN'),

(211, 'Two.One.One',   'EN'),
(212, 'Two.One.Two',   'EN'),
(213, 'Two.One.Three', 'EN');

-- lang_code = 'ES'
insert into category_lang (category_id, title, lang_code) values 
(1, 'Uno',  'ES'),
(2, 'Dos',  'ES'),
(3, 'Tres', 'ES'),

(11, 'Uno.Uno',  'ES'),
(12, 'Uno.Dos',  'ES'),
(13, 'Uno.Tres', 'ES'),

(21, 'Dos.Uno',  'ES'),
(22, 'Dos.Dos',  'ES'),
(31, 'Tres.Uno', 'ES'),

(211, 'Dos.Uno.Uno',  'ES'),
(212, 'Dos.Uno.Dos',  'ES'),
(213, 'Dos.Uno.Tres', 'ES');

A plain query produces a natural result like this:

select * from category tc 
left outer join category_lang tl on tl.category_id = tc.id and tl.lang_code = 'EN';

id |parent_id|tree_depth|id|category_id|lang_code|title        |
---|---------|----------|--|-----------|---------|-------------|
  1|         |         0| 1|          1|EN       |One          |
  2|         |         0| 2|          2|EN       |Two          |
  3|         |         0| 3|          3|EN       |Three        |
 11|        1|         1| 4|         11|EN       |One.One      |
 12|        1|         1| 5|         12|EN       |One.Two      |
 13|        1|         1| 6|         13|EN       |One.Three    |
 21|        2|         1| 7|         21|EN       |Two.One      |
 22|        2|         1| 8|         22|EN       |Two.Two      |
 31|        3|         1| 9|         31|EN       |Three.One    |
211|       21|         2|10|        211|EN       |Two.One.One  |
212|       21|         2|11|        212|EN       |Two.One.Two  |
213|       21|         2|12|        213|EN       |Two.One.Three|

when the expected order should be compliant with tree hierarchy and alphabetical order in English (at every depth level), like this:
[Edited to fix the error identified by Erwin]

id |parent_id|tree_depth|id|category_id|lang_code|title        |
---|---------|----------|--|-----------|---------|-------------|
  1|         |         0| 1|          1|EN       |One          |
 11|        1|         1| 4|         11|EN       |One.One      |
 13|        1|         1| 6|         13|EN       |One.Three    |
 12|        1|         1| 5|         12|EN       |One.Two      |
  3|         |         0| 3|          3|EN       |Three        |
 31|        3|         1| 9|         31|EN       |Three.One    |
  2|         |         0| 2|          2|EN       |Two          |
 21|        2|         1| 7|         21|EN       |Two.One      |
211|       21|         2|10|        211|EN       |Two.One.One  |
213|       21|         2|12|        213|EN       |Two.One.Three|
212|       21|         2|11|        212|EN       |Two.One.Two  |
 22|        2|         1| 8|         22|EN       |Two.Two      |

Note that the alphabetical order at every depth forces a different result for Spanish:
[Edited to fix the error identified by Erwin]

id |parent_id|tree_depth|id|category_id|lang_code|title       |
---|---------|----------|--|-----------|---------|------------|
  2|         |         0|14|          2|ES       |Dos         |
 22|        2|         1|20|         22|ES       |Dos.Dos     |
 21|        2|         1|19|         21|ES       |Dos.Uno     |
212|       21|         2|23|        212|ES       |Dos.Uno.Dos |
213|       21|         2|24|        213|ES       |Dos.Uno.Tres|
211|       21|         2|22|        211|ES       |Dos.Uno.Uno |
  1|         |         0|13|          1|ES       |Uno         |
 12|        1|         1|17|         12|ES       |Uno.Dos     |
 13|        1|         1|18|         13|ES       |Uno.Tres    |
 11|        1|         1|16|         11|ES       |Uno.Uno     |
  3|         |         0|15|          3|ES       |Tres        |
 31|        3|         1|21|         31|ES       |Tres.Uno    |

I have tried a number of approaches, including a recursive CTE as in https://www.postgresql.org/docs/12/queries-with.html, but none seems to cope with the problem of different orders for different languages.

Any ideas?

Erwin Brandstetter
  • 605,456
  • 145
  • 1,078
  • 1,228
coterobarros
  • 941
  • 1
  • 16
  • 25
  • In your final result, do you want the order to be different for `EN` and `ES`? I ask because the `order by` on the final result is what determines the ordering. – Mike Organek Jul 17 '20 at 18:11
  • Yes, as I mentioned, the final result must be ordered hierarchically and alphabetically, i.e., the graph determines the main order and the order by title determines the order at a given tree depth. – coterobarros Jul 17 '20 at 21:41
  • Please try `order by left(category_id::text, 1), title`. – Mike Organek Jul 17 '20 at 22:04
  • wow! this is almost perfect! ```select * from of_category tc left outer join of_category_lang tl on tl.category_id = tc.id and tl.lang_code = 'EN' order by left(tl.category_id::text, 1), tl.title``` – coterobarros Jul 17 '20 at 22:27
  • Unfortunately it fails for the first level, where returns `One, Two, Three`, instead of `One, Three, Two`. – coterobarros Jul 17 '20 at 22:28
  • Maybe you trusted too much my election of ids for the example. In a real case they would be totally different. – coterobarros Jul 17 '20 at 22:34

2 Answers2

3

... the expected order should be compliant with tree hierarchy and alphabetical order in English (at every depth level),

The additional difficulty is that category_lang(title, lang_code) is not defined UNIQUE, so we need to sort by title and category_id (as tiebreaker) on every level - which is hard to implement for a dynamic number of levels. An array of composite type can solve the conundrum.

Your displayed results do not currently comply with your requirement. 'Three' should sort before 'Two' according to English sorting rules. The result of the following query implements your requirement:

Create once per database:

CREATE TYPE title_id AS (title varchar(128), id int);

Then use a recursive CTE to generate an array of this composite type according to its path.

WITH RECURSIVE tree AS (
   SELECT c.id AS cat_id, c.parent_id, c.tree_depth
        , l.id AS lang_id, l.title, l.lang_code
        , ARRAY[(l.title, l.category_id)::title_id] AS sort_arr
   FROM   category      c 
   JOIN   category_lang l ON l.category_id = c.id
                         AND l.lang_code = 'EN'
   WHERE  c.parent_id IS NULL  -- root cat

   UNION ALL
   SELECT c.id AS cat_id, c.parent_id, c.tree_depth
        , l.id AS lang_id, l.title, l.lang_code
        , sort_arr || (l.title, l.category_id)::title_id
   FROM   tree          t
   JOIN   category      c ON c.parent_id = t.cat_id
   JOIN   category_lang l ON l.category_id = c.id
                         AND l.lang_code = t.lang_code
    )
SELECT cat_id, parent_id, tree_depth, lang_id, title 
FROM   tree
ORDER  BY sort_arr;

db<>fiddle here

Closely related with more explanation and details:

COLLATE?

But that's not all. The simple solution sorts by the default collation of your database, which seems inappropriate for different languages.

Every language has its own collation rules, or typically several of them, depending on the region of the world and other political / cultural rules. The "language" is not enough to specify exact rules for sorting. The precise locale matters. Postgres implements collation-aware sorting with the COLLATE keyword. You would have to store the actual precise collation in addition to the language, and use it to sort properly.

Also, indexes depend on the exact COLLATION. You might consider multiple partial indexes with different collation. Lots of tricky stuff that goes beyond the scope of this question. See:

Asides

Erwin Brandstetter
  • 605,456
  • 145
  • 1,078
  • 1,228
  • Thank you Erwin. All of them excellent suggestions. Yes, the english order is my fault. I did it manually and probably didn't pay attention to the 'w' in Two. – coterobarros Jul 17 '20 at 22:35
  • @coterobarros: Also note the added paragraph on collation. – Erwin Brandstetter Jul 17 '20 at 22:53
  • Wow much more than I expected from a humble category tree! I was struggling with the collate problem in other different parts of the database but this is a good starting point to address those problems. Thank you so much. – coterobarros Jul 17 '20 at 23:04
  • Your tip about `text` seems to be in conflict with PostgreSQL tip in https://www.postgresql.org/docs/9.1/datatype-character.html. Char(n) is clearly a poor election but seem to make no diference between `varchar` and `text`. I use `varchar` whenever its length is below 126 chars. – coterobarros Jul 17 '20 at 23:10
  • @coterobarros: I don't see the conflict. Basically, *never* use `char(n)`, and only use `varchar(n)` when the length restriction makes sense. I added [one more link](https://dba.stackexchange.com/a/21496/3684) above with more details. While all entries in `lang_code` have exactly two letters, even `char(2)` won't hurt much. But I still wouldn't use it. Complications with type casts may arise. – Erwin Brandstetter Jul 17 '20 at 23:16
  • Regarding char(2) for lang_code, does char(n) uses those +2 byte extra that varchar(<=126) uses? – coterobarros Jul 18 '20 at 00:45
  • 1
    About effective sizes, see: https://dba.stackexchange.com/a/125526/3684 – Erwin Brandstetter Jul 18 '20 at 01:06
  • Thanks again.Such a useful function `pg_column_size`! Sorry, but I'm under 15 in db.stackexchanche and cannot push there your answer up, – coterobarros Jul 18 '20 at 09:23
0

Before Erwin answered with a much simpler solution, I made this recursive approach by myself. It works better inserting a virtual root category, which allow us to retrive the whole tree from a single entry point.

create or replace function list_category_tree (
    _category_id integer,
    _lang_code char(2)
)
returns setof category
as $$
declare
    _child_category category;
begin
    -- return the passed category
    return query
    select * from category where id = _category_id;

    -- loop over the passed category children
    for _child_category in 
        select tc.* 
        from category tc
        join category_lang tl on tl.category_id = tc.id and tl.lang_code = _lang_code
        where tc.parent_id = _category_id
        order by tl.title asc
    loop
        -- recursively look for every children childrens
        return query 
        select * from list_category_tree(_child_category.id, _lang_code);
    end loop;
end;
$$ language plpgsql;

A simple test could be

select * 
from list_category_tree (0, 'EN') as tc
join category_lang tl on tl.category_id = tc.id and tl.lang_code = 'EN';

id |parent_id|tree_depth|id|category_id|lang_code|title        |
---|---------|----------|--|-----------|---------|-------------|
  1|         |         0| 1|          1|EN       |One          |
 11|        1|         1| 4|         11|EN       |One.One      |
 13|        1|         1| 6|         13|EN       |One.Three    |
 12|        1|         1| 5|         12|EN       |One.Two      |
  2|         |         0| 2|          2|EN       |Two          |
 21|        2|         1| 7|         21|EN       |Two.One      |
211|       21|         2|10|        211|EN       |Two.One.One  |
213|       21|         2|12|        213|EN       |Two.One.Three|
212|       21|         2|11|        212|EN       |Two.One.Two  |
 22|        2|         1| 8|         22|EN       |Two.Two      |
  3|         |         0| 3|          3|EN       |Three        |
 31|        3|         1| 9|         31|EN       |Three.One    |


select * 
from list_category_tree (0, 'ES') as tc
join of_category_lang tl on tl.category_id = tc.id and tl.lang_code = 'ES';

id |parent_id|tree_depth|id|category_id|lang_code|title       |
---|---------|----------|--|-----------|---------|------------|
  2|        0|         1|14|          2|ES       |Dos         |
 22|        2|         2|20|         22|ES       |Dos.Dos     |
 21|        2|         2|19|         21|ES       |Dos.Uno     |
212|       21|         3|23|        212|ES       |Dos.Uno.Dos |
213|       21|         3|24|        213|ES       |Dos.Uno.Tres|
211|       21|         3|22|        211|ES       |Dos.Uno.Uno |
  3|        0|         1|15|          3|ES       |Tres        |
 31|        3|         2|21|         31|ES       |Tres.Uno    |
  1|        0|         1|13|          1|ES       |Uno         |
 12|        1|         2|17|         12|ES       |Uno.Dos     |
 13|        1|         2|18|         13|ES       |Uno.Tres    |
 11|        1|         2|16|         11|ES       |Uno.Uno     |

having inserted the root node as

insert into of_category (parent_id, id) values 
(null, 0),

(null, 1),
(null, 2),
(null, 3),

(1, 11),
(1, 12),
(1, 13),

(2, 21),
(2, 22),
(3, 31),

(21, 211),
(21, 212),
(21, 213);

-- lang_code = 'EN'
insert into of_category_lang (category_id, title, lang_code) values 
(0, 'Root', 'EN'),

(1, 'One',   'EN'),
(2, 'Two',   'EN'),
(3, 'Three', 'EN'),

(11, 'One.One',   'EN'),
(12, 'One.Two',   'EN'),
(13, 'One.Three', 'EN'),

(21, 'Two.One',   'EN'),
(22, 'Two.Two',   'EN'),
(31, 'Three.One', 'EN'),

(211, 'Two.One.One',   'EN'),
(212, 'Two.One.Two',   'EN'),
(213, 'Two.One.Three', 'EN');

-- lang_code = 'ES'
insert into of_category_lang (category_id, title, lang_code) values 
(0, 'Raíz', 'ES'),

(1, 'Uno',  'ES'),
(2, 'Dos',  'ES'),
(3, 'Tres', 'ES'),

(11, 'Uno.Uno',  'ES'),
(12, 'Uno.Dos',  'ES'),
(13, 'Uno.Tres', 'ES'),

(21, 'Dos.Uno',  'ES'),
(22, 'Dos.Dos',  'ES'),
(31, 'Tres.Uno', 'ES'),

(211, 'Dos.Uno.Uno',  'ES'),
(212, 'Dos.Uno.Dos',  'ES'),
(213, 'Dos.Uno.Tres', 'ES');
coterobarros
  • 941
  • 1
  • 16
  • 25