13

Is there a mysql stored function out there to create a slug from a url (or any value really).

So my query can be:

SELECT *, SLUG(url) FROM clients
GEOCHET
  • 21,119
  • 15
  • 74
  • 98
Robert Ross
  • 1,895
  • 2
  • 23
  • 32
  • Possible duplicate of http://stackoverflow.com/questions/3690432/mysql-query-to-retrieve-full-url-slug – Brad Mar 23 '11 at 18:26
  • that works on multiple languages with the help of transliteration. http://stackoverflow.com/questions/30570865/how-to-rewrite-this-php-slugify-function-to-mysql?noredirect=1#comment49267833_30570865 – juslintek Jun 02 '15 at 15:08

8 Answers8

26

This is an improved version of Robert Ross's answer. It is much faster since it avoids looping through all of the allowed characters and just checks by comparing ASCII codes.

DROP FUNCTION IF EXISTS `slugify`;
DELIMITER ;;
CREATE DEFINER=`root`@`localhost`
FUNCTION `slugify`(dirty_string varchar(200))
RETURNS varchar(200) CHARSET latin1
DETERMINISTIC
BEGIN
    DECLARE x, y , z Int;
    Declare temp_string, new_string VarChar(200);
    Declare is_allowed Bool;
    Declare c, check_char VarChar(1);

    set temp_string = LOWER(dirty_string);

    Set temp_string = replace(temp_string, '&', ' and ');

    Select temp_string Regexp('[^a-z0-9\-]+') into x;
    If x = 1 then
        set z = 1;
        While z <= Char_length(temp_string) Do
            Set c = Substring(temp_string, z, 1);
            Set is_allowed = False;
            If !((ascii(c) = 45) or (ascii(c) >= 48 and ascii(c) <= 57) or (ascii(c) >= 97 and ascii(c) <= 122)) Then
                Set temp_string = Replace(temp_string, c, '-');
            End If;
            set z = z + 1;
        End While;
    End If;

    Select temp_string Regexp("^-|-$|'") into x;
    If x = 1 Then
        Set temp_string = Replace(temp_string, "'", '');
        Set z = Char_length(temp_string);
        Set y = Char_length(temp_string);
        Dash_check: While z > 1 Do
            If Strcmp(SubString(temp_string, -1, 1), '-') = 0 Then
                Set temp_string = Substring(temp_string,1, y-1);
                Set y = y - 1;
            Else
                Leave Dash_check;
            End If;
            Set z = z - 1;
        End While;
    End If;

    Repeat
        Select temp_string Regexp("--") into x;
        If x = 1 Then
            Set temp_string = Replace(temp_string, "--", "-");
        End If;
    Until x <> 1 End Repeat;

    If LOCATE('-', temp_string) = 1 Then
        Set temp_string = SUBSTRING(temp_string, 2);
    End If;

    Return temp_string;
END;;
DELIMITER ;
Serg
  • 2,346
  • 3
  • 29
  • 38
Gregg
  • 261
  • 3
  • 2
  • Thanks, this is a WHOLE lot faster! Do you happen to have a faster solution to the answer below for checking to see if the slug is unique? – Brad Feb 24 '12 at 18:24
  • OMG! Dude, if you were I girl I would marry you! This one is awesome! Well done, really, well done! Thanks a lot! – Nikola Svitlica Nov 23 '15 at 08:24
  • Very nice. Be careful though, replacing `&` with ` and ` means that temp_string and the result can be longer than the input string, causing: ```ERROR 1406 (22001) at line nnn: Data too long for column 'temp_string' ``` I worked around this with a folksy ```replace(temp_string, '&', 'n')``` – Zaq Nov 24 '22 at 06:54
7

I took the Slugifier from http://nastyhabit.wordpress.com/2008/09/25/mysql-slug-maker-function-aka-the-slugifier/

And modified it to not include "-" in the beginning, (We had "$" as the first character)

Here's my result:

DROP FUNCTION IF EXISTS `slugify`;
DELIMITER ;;
CREATE DEFINER=`root`@`localhost`
FUNCTION `slugify`(dirty_string varchar(200))
RETURNS varchar(200) CHARSET latin1
DETERMINISTIC
BEGIN
    DECLARE x, y , z Int;
    Declare temp_string, allowed_chars, new_string VarChar(200);
    Declare is_allowed Bool;
    Declare c, check_char VarChar(1);

    set allowed_chars = "abcdefghijklmnopqrstuvwxyz0123456789-";
    set temp_string = dirty_string;

    Select temp_string Regexp('&') Into x;
    If x = 1 Then
        Set temp_string = replace(temp_string, '&', ' and ');
    End If;

    Select temp_string Regexp('[^a-z0-9]+') into x;
    If x = 1 then
        set z = 1;
        While z <= Char_length(temp_string) Do
            Set c = Substring(temp_string, z, 1);
            Set is_allowed = False;
            Set y = 1;
            Inner_Check: While y <= Char_length(allowed_chars) Do
                If (strCmp(ascii(Substring(allowed_chars,y,1)), Ascii(c)) = 0) Then
                    Set is_allowed = True;
                    Leave Inner_Check;
                End If;
                Set y = y + 1;
            End While;
            If is_allowed = False Then
                Set temp_string = Replace(temp_string, c, '-');
            End If;

            set z = z + 1;
        End While;
    End If;

    Select temp_string Regexp("^-|-$|'") into x;
    If x = 1 Then
        Set temp_string = Replace(temp_string, "'", '');
        Set z = Char_length(temp_string);
        Set y = Char_length(temp_string);
        Dash_check: While z > 1 Do
            If Strcmp(SubString(temp_string, -1, 1), '-') = 0 Then
                Set temp_string = Substring(temp_string,1, y-1);
                Set y = y - 1;
            Else
                Leave Dash_check;
            End If;
            Set z = z - 1;
        End While;
    End If;

    Repeat
        Select temp_string Regexp("--") into x;
        If x = 1 Then
            Set temp_string = Replace(temp_string, "--", "-");
        End If;
    Until x <> 1 End Repeat;

    If LOCATE('-', temp_string) = 1 Then
        Set temp_string = SUBSTRING(temp_string, 2);
    End If;

    Return temp_string;
END;;
DELIMITER ;

Works well, But! It's pretty slow. If you're trying to select something off of this, you'll add about 1000% of time to the query compared to selecting off a pre-slugged column that's indexed.

Slugged for 500 results was .27 seconds Non-slugged (through mysql) was .00003 seconds

For inserting data though, this function would work great! Just insert the slugged data into a pre-defined column (THATS INDEXED, since why wouldnt you select something thats slugged?)

Note: The text to be 'slugified' needs to be in lowercase first, as this function does not handle Upper Case letters (converts them to '-').

GEOCHET
  • 21,119
  • 15
  • 74
  • 98
Robert Ross
  • 1,895
  • 2
  • 23
  • 32
2

I have implemented my own slug function supporting accented characters, any contribution to it is welcome.

https://github.com/falcacibar/mysql-routines-collection/blob/master/generate_slug.func.sql

Feel free to post any suggestions, bugs or any issue or contribution on github or here but is better github

Felipe Buccioni
  • 19,109
  • 2
  • 28
  • 28
2

I'm not sure if I would recommend doing this in SQL, but here is a guy that made a function for you called "slugify":

http://nastyhabit.wordpress.com/2008/09/25/mysql-slug-maker-function-aka-the-slugifier/

Brad
  • 159,648
  • 54
  • 349
  • 530
  • I found that and his solution didn't look appealing... maybe I can take some ideas from it. Why don't you suggest it? – Robert Ross Mar 23 '11 at 18:28
  • 2
    I think it would be easier to implement on the application layer of things, in PHP in your case. At least, it would be for me. I'm no SQL whiz. For data formatting (which is pretty much what this is), I like to leave that up to the consumer of the data. You should also do some speed tests, to see which method is more efficient. I think that will determine which method is appropriate for your case. – Brad Mar 23 '11 at 18:31
  • Yea thats what I'm actually doing, trying to find some benchmark numbers and decide from there. – Robert Ross Mar 23 '11 at 18:33
  • Maybe the logic is not in PHP or Wordpress and this is in a mass SQL treatment. It will be awkward to create a PHP line by line routine just for this. – Orden Dec 18 '18 at 10:02
2

I added some lines to the function Robert posted, in order to make sure the slug is always unique.

This goes right before the end of the function, as you can see below. Make sure you indicate the table name without the brackets [].

    SELECT COUNT(*) INTO i FROM [table name goes here] WHERE slug LIKE CONCAT('%',temp_string,'%');
    If i > 0 Then
        Set temp_string = CONCAT(temp_string,'-',i+1);
    End If;

    Return temp_string;
END;;
DELIMITER ;
Eduardo J Garcia
  • 103
  • 1
  • 2
  • 6
1

I have used this code for a long time. Posting here to remember and maybe help someone nowadays

just copy/paste this snippet in your MySQL query tab and run it.

-- suposing your user is root and host is localhost. If not, change root user and localhost value to match yours

CREATE DEFINER=`root`@`localhost` FUNCTION `toSlug`(
    `s` NVARCHAR(500)
)
RETURNS varchar(500) CHARSET utf8
LANGUAGE SQL
DETERMINISTIC
CONTAINS SQL
SQL SECURITY DEFINER
COMMENT ''
RETURN REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
REPLACE(REPLACE(LOWER(TRIM(s)), 
':', ''), ')', ''), '(', ''), ',', ''), '\\', ''), '/', ''), '"', ''), '?', ''),
"'", ''), '&', ''), '!', ''), '.', ''), ' ', '-'), '--', '-'), '--', '-'),'ù','u'),
'ú','u'),'û','u'),'ü','u'),'ý','y'),'ë','e'),'à','a'),'á','a'),'â','a'),'ã','a'), 
'ä','a'),'å','a'),'æ','a'),'ç','c'),'è','e'),'é','e'),'ê','e'),'ë','e'),'ì','i'),
'í','i'),'ě','e'), 'š','s'), 'č','c'),'ř','r'), 'ž','z'), 'î','i'),'ï','i'),'ð','o'),
'ñ','n'),'ò','o'),'ó','o'),'ô','o'),'õ','o'),'ö','o'),'ø','o'),'%', '')

It will create a function you can call like:

UPDATE my_table set my_new_slug_column = toSlug(my_any_column_id_like_to_slug);

It will get my_any_column_id_like_to_slug value and rewrite/copy to my_new_slug_column in table my_table

You can also convert a column with text to its slug like:

UPDATE my_table set my_column = toSlug(my_column);

This case will update my_column itself so e.g. 'Oh my gosh' will be 'oh-my-gosh'

Felipe Lima
  • 443
  • 1
  • 10
  • 19
1

My two cents:

CREATE FUNCTION slugify(str VARCHAR(255))
  RETURNS VARCHAR(255)
  LANGUAGE SQL
  DETERMINISTIC
  NO SQL
  SQL SECURITY INVOKER
BEGIN
    DECLARE slug, allowed_chars VARCHAR(255);
    DECLARE current_char VARCHAR(1);
    DECLARE pos, len INT;

    -- Add here custom replaces
    SET slug = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(LOWER(TRIM(str)), 'ú', 'u'), 'ç', 'c'), 'ğ', 'g'), ' ', '-'), 'é', 'e'), 'è', 'e'), 'ë', 'e'), 'í', 'i'), 'î', 'i'), 'ò', 'o'), 'õ', 'o'), 'ù', 'u'), 'â', 'a'), 'ã', 'a'), 'ö', 'o'), 'ş', 's'), 'ì', 'i'), 'æ', 'ae'), 'à', 'a'), 'ê', 'e'), 'ñ', 'n'), 'ý', 'y'), 'ô', 'o'), 'û', 'u'), 'ï', 'i'), 'ó', 'o'), 'ü', 'u'), 'á', 'a'), 'å', 'a'), 'ä', 'a'), '_', '-');
    SET pos = 1;
    SET len = CHAR_LENGTH(slug);
    SET allowed_chars = 'abcdefghijklmnopqrstuvwxyz0123456789-';

    -- Remove not allowed characters
    WHILE pos <= len DO
        SET current_char = SUBSTRING(slug, pos, 1);
        IF LOCATE(current_char, allowed_chars) = 0 THEN
            SET slug = REPLACE(slug, current_char, '');
        END IF;
        SET pos = pos + 1;
    END WHILE;
    
    -- Squish dashes
    WHILE LOCATE('--', slug) > 0 DO
        SET slug = REPLACE(slug, '--', '-');
    END WHILE;
    
    RETURN slug;
END;
Pioz
  • 6,051
  • 4
  • 48
  • 67
0
DELIMITER $$
CREATE FUNCTION slug(input_string VARCHAR(255)) RETURNS VARCHAR(255)
DETERMINISTIC
BEGIN
    DECLARE slug          VARCHAR(255);
    DECLARE replace_chars VARCHAR(100) DEFAULT 'ÀÁÂÃÄÅàáâãäåÒÓÔÕÖØòóôõöøÈÉÊËèéêëÇçÌÍÎÏìíîïÙÚÛÜùúûüÝýÿÑñ';
    DECLARE with_chars    VARCHAR(100) DEFAULT 'AAAAAAaaaaaaOOOOOOooooooEEEEeeeeCcIIIIiiiiUUUUuuuuYyyNn';
    DECLARE i             INT          DEFAULT 1;
    DECLARE len           INT;

    -- Remove special characters.
    SET slug = LOWER(input_string);
    SET slug = REGEXP_REPLACE(slug, '[^a-z0-9]+', '-');

    -- Replace accented characters.
    SET len = CHAR_LENGTH(replace_chars);

    WHILE i <= len DO
        SET slug = REPLACE(slug, SUBSTRING(replace_chars, i, 1), SUBSTRING(with_chars, i, 1));
        SET i = i + 1;
    END WHILE;

    -- Remove leading and trailing hyphens.
    SET slug = TRIM(BOTH '-' FROM slug);

    RETURN slug;
END$$
DELIMITER ;
Tyler
  • 161
  • 1
  • 11