32

i was wondering if there is a MYSQL function to decode text with html entities. I have seen some approaches using REPLACE but it looks kinda hard to manage all the entities.

Raul Leaño Martinet
  • 2,035
  • 6
  • 28
  • 44
  • 9
    The fact that you ask this implies that you have an error in your application design. The DB engine should not be concerned with this, and changing the application so that you do not need such a function is the right thing to do. – Tomalak Sep 09 '10 at 17:28
  • 16
    @Tomalak that's simply ridiculous. That you can't think of a scenario where one may need it doen't mean it doesn't exist. The OP hasn't specified what he's using this for, so you can't assume he's doing something wrong. – matteo Dec 10 '12 at 22:09
  • @matteo I did not even say that I can't think of such a scenario. I said that that things are messed up when you find yourself in such a scenario. It's the proverbial "when in a hole, stop digging" situation. That something - *somehow* - might be technically possible is not at all a good reason to do it. Instead of reinforcing an ill-fated approach, my advice was to sit back and get some perspective. – Tomalak Dec 10 '12 at 22:36
  • 5
    ? lots of people have to deal with applications they can't change..... starting from scratch is a luxury – Mike M Sep 18 '17 at 01:06

5 Answers5

33

You can create function like below

    DELIMITER $$ 
    DROP FUNCTION IF EXISTS `HTML_UnEncode`$$ 
    CREATE FUNCTION `HTML_UnEncode`(X VARCHAR(255)) RETURNS VARCHAR(255) CHARSET latin1 DETERMINISTIC
    BEGIN 

    DECLARE TextString VARCHAR(255) ; 
    SET TextString = X ; 

    #quotation mark 
    IF INSTR( X , '"' ) 
    THEN SET TextString = REPLACE(TextString, '"','"') ; 
    END IF ; 

    #apostrophe  
    IF INSTR( X , ''' ) 
    THEN SET TextString = REPLACE(TextString, ''','"') ; 
    END IF ; 

    #ampersand 
    IF INSTR( X , '&' ) 
    THEN SET TextString = REPLACE(TextString, '&','&') ; 
    END IF ; 

    #less-than 
    IF INSTR( X , '<' ) 
    THEN SET TextString = REPLACE(TextString, '&lt;','<') ; 
    END IF ; 

    #greater-than 
    IF INSTR( X , '&gt;' ) 
    THEN SET TextString = REPLACE(TextString, '&gt;','>') ; 
    END IF ; 

    #non-breaking space 
    IF INSTR( X , '&nbsp;' ) 
    THEN SET TextString = REPLACE(TextString, '&nbsp;',' ') ; 
    END IF ; 

    #inverted exclamation mark 
    IF INSTR( X , '&iexcl;' ) 
    THEN SET TextString = REPLACE(TextString, '&iexcl;','¡') ; 
    END IF ; 

    #cent 
    IF INSTR( X , '&cent;' ) 
    THEN SET TextString = REPLACE(TextString, '&cent;','¢') ; 
    END IF ; 

    #pound 
    IF INSTR( X , '&pound;' ) 
    THEN SET TextString = REPLACE(TextString, '&pound;','£') ; 
    END IF ; 

    #currency 
    IF INSTR( X , '&curren;' ) 
    THEN SET TextString = REPLACE(TextString, '&curren;','¤') ; 
    END IF ; 

    #yen 
    IF INSTR( X , '&yen;' ) 
    THEN SET TextString = REPLACE(TextString, '&yen;','¥') ; 
    END IF ; 

    #broken vertical bar 
    IF INSTR( X , '&brvbar;' ) 
    THEN SET TextString = REPLACE(TextString, '&brvbar;','¦') ; 
    END IF ; 

    #section 
    IF INSTR( X , '&sect;' ) 
    THEN SET TextString = REPLACE(TextString, '&sect;','§') ; 
    END IF ; 

    #spacing diaeresis 
    IF INSTR( X , '&uml;' ) 
    THEN SET TextString = REPLACE(TextString, '&uml;','¨') ; 
    END IF ; 

    #copyright 
    IF INSTR( X , '&copy;' ) 
    THEN SET TextString = REPLACE(TextString, '&copy;','©') ; 
    END IF ; 

    #feminine ordinal indicator 
    IF INSTR( X , '&ordf;' ) 
    THEN SET TextString = REPLACE(TextString, '&ordf;','ª') ; 
    END IF ; 

    #angle quotation mark (left) 
    IF INSTR( X , '&laquo;' ) 
    THEN SET TextString = REPLACE(TextString, '&laquo;','«') ; 
    END IF ; 

    #negation 
    IF INSTR( X , '&not;' ) 
    THEN SET TextString = REPLACE(TextString, '&not;','¬') ; 
    END IF ; 

    #soft hyphen 
    IF INSTR( X , '&shy;' ) 
    THEN SET TextString = REPLACE(TextString, '&shy;','­') ; 
    END IF ; 

    #registered trademark 
    IF INSTR( X , '&reg;' ) 
    THEN SET TextString = REPLACE(TextString, '&reg;','®') ; 
    END IF ; 

    #spacing macron 
    IF INSTR( X , '&macr;' ) 
    THEN SET TextString = REPLACE(TextString, '&macr;','¯') ; 
    END IF ; 

    #degree 
    IF INSTR( X , '&deg;' ) 
    THEN SET TextString = REPLACE(TextString, '&deg;','°') ; 
    END IF ; 

    #plus-or-minus  
    IF INSTR( X , '&plusmn;' ) 
    THEN SET TextString = REPLACE(TextString, '&plusmn;','±') ; 
    END IF ; 

    #superscript 2 
    IF INSTR( X , '&sup2;' ) 
    THEN SET TextString = REPLACE(TextString, '&sup2;','²') ; 
    END IF ; 

    #superscript 3 
    IF INSTR( X , '&sup3;' ) 
    THEN SET TextString = REPLACE(TextString, '&sup3;','³') ; 
    END IF ; 

    #spacing acute 
    IF INSTR( X , '&acute;' ) 
    THEN SET TextString = REPLACE(TextString, '&acute;','´') ; 
    END IF ; 

    #micro 
    IF INSTR( X , '&micro;' ) 
    THEN SET TextString = REPLACE(TextString, '&micro;','µ') ; 
    END IF ; 

    #paragraph 
    IF INSTR( X , '&para;' ) 
    THEN SET TextString = REPLACE(TextString, '&para;','¶') ; 
    END IF ; 

    #middle dot 
    IF INSTR( X , '&middot;' ) 
    THEN SET TextString = REPLACE(TextString, '&middot;','·') ; 
    END IF ; 

    #spacing cedilla 
    IF INSTR( X , '&cedil;' ) 
    THEN SET TextString = REPLACE(TextString, '&cedil;','¸') ; 
    END IF ; 

    #superscript 1 
    IF INSTR( X , '&sup1;' ) 
    THEN SET TextString = REPLACE(TextString, '&sup1;','¹') ; 
    END IF ; 

    #masculine ordinal indicator 
    IF INSTR( X , '&ordm;' ) 
    THEN SET TextString = REPLACE(TextString, '&ordm;','º') ; 
    END IF ; 

    #angle quotation mark (right) 
    IF INSTR( X , '&raquo;' ) 
    THEN SET TextString = REPLACE(TextString, '&raquo;','»') ; 
    END IF ; 

    #fraction 1/4 
    IF INSTR( X , '&frac14;' ) 
    THEN SET TextString = REPLACE(TextString, '&frac14;','¼') ; 
    END IF ; 

    #fraction 1/2 
    IF INSTR( X , '&frac12;' ) 
    THEN SET TextString = REPLACE(TextString, '&frac12;','½') ; 
    END IF ; 

    #fraction 3/4 
    IF INSTR( X , '&frac34;' ) 
    THEN SET TextString = REPLACE(TextString, '&frac34;','¾') ; 
    END IF ; 

    #inverted question mark 
    IF INSTR( X , '&iquest;' ) 
    THEN SET TextString = REPLACE(TextString, '&iquest;','¿') ; 
    END IF ; 

    #multiplication 
    IF INSTR( X , '&times;' ) 
    THEN SET TextString = REPLACE(TextString, '&times;','×') ; 
    END IF ; 

    #division 
    IF INSTR( X , '&divide;' ) 
    THEN SET TextString = REPLACE(TextString, '&divide;','÷') ; 
    END IF ; 

    #capital a, grave accent 
    IF INSTR( X , '&Agrave;' ) 
    THEN SET TextString = REPLACE(TextString, '&Agrave;','À') ; 
    END IF ; 

    #capital a, acute accent 
    IF INSTR( X , '&Aacute;' ) 
    THEN SET TextString = REPLACE(TextString, '&Aacute;','Á') ; 
    END IF ; 

    #capital a, circumflex accent 
    IF INSTR( X , '&Acirc;' ) 
    THEN SET TextString = REPLACE(TextString, '&Acirc;','Â') ; 
    END IF ; 

    #capital a, tilde 
    IF INSTR( X , '&Atilde;' ) 
    THEN SET TextString = REPLACE(TextString, '&Atilde;','Ã') ; 
    END IF ; 

    #capital a, umlaut mark 
    IF INSTR( X , '&Auml;' ) 
    THEN SET TextString = REPLACE(TextString, '&Auml;','Ä') ; 
    END IF ; 

    #capital a, ring 
    IF INSTR( X , '&Aring;' ) 
    THEN SET TextString = REPLACE(TextString, '&Aring;','Å') ; 
    END IF ; 

    #capital ae 
    IF INSTR( X , '&AElig;' ) 
    THEN SET TextString = REPLACE(TextString, '&AElig;','Æ') ; 
    END IF ; 

    #capital c, cedilla 
    IF INSTR( X , '&Ccedil;' ) 
    THEN SET TextString = REPLACE(TextString, '&Ccedil;','Ç') ; 
    END IF ; 

    #capital e, grave accent 
    IF INSTR( X , '&Egrave;' ) 
    THEN SET TextString = REPLACE(TextString, '&Egrave;','È') ; 
    END IF ; 

    #capital e, acute accent 
    IF INSTR( X , '&Eacute;' ) 
    THEN SET TextString = REPLACE(TextString, '&Eacute;','É') ; 
    END IF ; 

    #capital e, circumflex accent 
    IF INSTR( X , '&Ecirc;' ) 
    THEN SET TextString = REPLACE(TextString, '&Ecirc;','Ê') ; 
    END IF ; 

    #capital e, umlaut mark 
    IF INSTR( X , '&Euml;' ) 
    THEN SET TextString = REPLACE(TextString, '&Euml;','Ë') ; 
    END IF ; 

    #capital i, grave accent 
    IF INSTR( X , '&Igrave;' ) 
    THEN SET TextString = REPLACE(TextString, '&Igrave;','Ì') ; 
    END IF ; 

    #capital i, acute accent 
    IF INSTR( X , '&Iacute;' ) 
    THEN SET TextString = REPLACE(TextString, '&Iacute;','Í') ; 
    END IF ; 

    #capital i, circumflex accent 
    IF INSTR( X , '&Icirc;' ) 
    THEN SET TextString = REPLACE(TextString, '&Icirc;','Î') ; 
    END IF ; 

    #capital i, umlaut mark 
    IF INSTR( X , '&Iuml;' ) 
    THEN SET TextString = REPLACE(TextString, '&Iuml;','Ï') ; 
    END IF ; 

    #capital eth, Icelandic 
    IF INSTR( X , '&ETH;' ) 
    THEN SET TextString = REPLACE(TextString, '&ETH;','Ð') ; 
    END IF ; 

    #capital n, tilde 
    IF INSTR( X , '&Ntilde;' ) 
    THEN SET TextString = REPLACE(TextString, '&Ntilde;','Ñ') ; 
    END IF ; 

    #capital o, grave accent 
    IF INSTR( X , '&Ograve;' ) 
    THEN SET TextString = REPLACE(TextString, '&Ograve;','Ò') ; 
    END IF ; 

    #capital o, acute accent 
    IF INSTR( X , '&Oacute;' ) 
    THEN SET TextString = REPLACE(TextString, '&Oacute;','Ó') ; 
    END IF ; 

    #capital o, circumflex accent 
    IF INSTR( X , '&Ocirc;' ) 
    THEN SET TextString = REPLACE(TextString, '&Ocirc;','Ô') ; 
    END IF ; 

    #capital o, tilde 
    IF INSTR( X , '&Otilde;' ) 
    THEN SET TextString = REPLACE(TextString, '&Otilde;','Õ') ; 
    END IF ; 

    #capital o, umlaut mark 
    IF INSTR( X , '&Ouml;' ) 
    THEN SET TextString = REPLACE(TextString, '&Ouml;','Ö') ; 
    END IF ; 

    #capital o, slash 
    IF INSTR( X , '&Oslash;' ) 
    THEN SET TextString = REPLACE(TextString, '&Oslash;','Ø') ; 
    END IF ; 

    #capital u, grave accent 
    IF INSTR( X , '&Ugrave;' ) 
    THEN SET TextString = REPLACE(TextString, '&Ugrave;','Ù') ; 
    END IF ; 

    #capital u, acute accent 
    IF INSTR( X , '&Uacute;' ) 
    THEN SET TextString = REPLACE(TextString, '&Uacute;','Ú') ; 
    END IF ; 

    #capital u, circumflex accent 
    IF INSTR( X , '&Ucirc;' ) 
    THEN SET TextString = REPLACE(TextString, '&Ucirc;','Û') ; 
    END IF ; 

    #capital u, umlaut mark 
    IF INSTR( X , '&Uuml;' ) 
    THEN SET TextString = REPLACE(TextString, '&Uuml;','Ü') ; 
    END IF ; 

    #capital y, acute accent 
    IF INSTR( X , '&Yacute;' ) 
    THEN SET TextString = REPLACE(TextString, '&Yacute;','Ý') ; 
    END IF ; 

    #capital THORN, Icelandic 
    IF INSTR( X , '&THORN;' ) 
    THEN SET TextString = REPLACE(TextString, '&THORN;','Þ') ; 
    END IF ; 

    #small sharp s, German 
    IF INSTR( X , '&szlig;' ) 
    THEN SET TextString = REPLACE(TextString, '&szlig;','ß') ; 
    END IF ; 

    #small a, grave accent 
    IF INSTR( X , '&agrave;' ) 
    THEN SET TextString = REPLACE(TextString, '&agrave;','à') ; 
    END IF ; 

    #small a, acute accent 
    IF INSTR( X , '&aacute;' ) 
    THEN SET TextString = REPLACE(TextString, '&aacute;','á') ; 
    END IF ; 

    #small a, circumflex accent 
    IF INSTR( X , '&acirc;' ) 
    THEN SET TextString = REPLACE(TextString, '&acirc;','â') ; 
    END IF ; 

    #small a, tilde 
    IF INSTR( X , '&atilde;' ) 
    THEN SET TextString = REPLACE(TextString, '&atilde;','ã') ; 
    END IF ; 

    #small a, umlaut mark 
    IF INSTR( X , '&auml;' ) 
    THEN SET TextString = REPLACE(TextString, '&auml;','ä') ; 
    END IF ; 

    #small a, ring 
    IF INSTR( X , '&aring;' ) 
    THEN SET TextString = REPLACE(TextString, '&aring;','å') ; 
    END IF ; 

    #small ae 
    IF INSTR( X , '&aelig;' ) 
    THEN SET TextString = REPLACE(TextString, '&aelig;','æ') ; 
    END IF ; 

    #small c, cedilla 
    IF INSTR( X , '&ccedil;' ) 
    THEN SET TextString = REPLACE(TextString, '&ccedil;','ç') ; 
    END IF ; 

    #small e, grave accent 
    IF INSTR( X , '&egrave;' ) 
    THEN SET TextString = REPLACE(TextString, '&egrave;','è') ; 
    END IF ; 

    #small e, acute accent 
    IF INSTR( X , '&eacute;' ) 
    THEN SET TextString = REPLACE(TextString, '&eacute;','é') ; 
    END IF ; 

    #small e, circumflex accent 
    IF INSTR( X , '&ecirc;' ) 
    THEN SET TextString = REPLACE(TextString, '&ecirc;','ê') ; 
    END IF ; 

    #small e, umlaut mark 
    IF INSTR( X , '&euml;' ) 
    THEN SET TextString = REPLACE(TextString, '&euml;','ë') ; 
    END IF ; 

    #small i, grave accent 
    IF INSTR( X , '&igrave;' ) 
    THEN SET TextString = REPLACE(TextString, '&igrave;','ì') ; 
    END IF ; 

    #small i, acute accent 
    IF INSTR( X , '&iacute;' ) 
    THEN SET TextString = REPLACE(TextString, '&iacute;','í') ; 
    END IF ; 

    #small i, circumflex accent 
    IF INSTR( X , '&icirc;' ) 
    THEN SET TextString = REPLACE(TextString, '&icirc;','î') ; 
    END IF ; 

    #small i, umlaut mark 
    IF INSTR( X , '&iuml;' ) 
    THEN SET TextString = REPLACE(TextString, '&iuml;','ï') ; 
    END IF ; 

    #small eth, Icelandic 
    IF INSTR( X , '&eth;' ) 
    THEN SET TextString = REPLACE(TextString, '&eth;','ð') ; 
    END IF ; 

    #small n, tilde 
    IF INSTR( X , '&ntilde;' ) 
    THEN SET TextString = REPLACE(TextString, '&ntilde;','ñ') ; 
    END IF ; 

    #small o, grave accent 
    IF INSTR( X , '&ograve;' ) 
    THEN SET TextString = REPLACE(TextString, '&ograve;','ò') ; 
    END IF ; 

    #small o, acute accent 
    IF INSTR( X , '&oacute;' ) 
    THEN SET TextString = REPLACE(TextString, '&oacute;','ó') ; 
    END IF ; 

    #small o, circumflex accent 
    IF INSTR( X , '&ocirc;' ) 
    THEN SET TextString = REPLACE(TextString, '&ocirc;','ô') ; 
    END IF ; 

    #small o, tilde 
    IF INSTR( X , '&otilde;' ) 
    THEN SET TextString = REPLACE(TextString, '&otilde;','õ') ; 
    END IF ; 

    #small o, umlaut mark 
    IF INSTR( X , '&ouml;' ) 
    THEN SET TextString = REPLACE(TextString, '&ouml;','ö') ; 
    END IF ; 

    #small o, slash 
    IF INSTR( X , '&oslash;' ) 
    THEN SET TextString = REPLACE(TextString, '&oslash;','ø') ; 
    END IF ; 

    #small u, grave accent 
    IF INSTR( X , '&ugrave;' ) 
    THEN SET TextString = REPLACE(TextString, '&ugrave;','ù') ; 
    END IF ; 

    #small u, acute accent 
    IF INSTR( X , '&uacute;' ) 
    THEN SET TextString = REPLACE(TextString, '&uacute;','ú') ; 
    END IF ; 

    #small u, circumflex accent 
    IF INSTR( X , '&ucirc;' ) 
    THEN SET TextString = REPLACE(TextString, '&ucirc;','û') ; 
    END IF ; 

    #small u, umlaut mark 
    IF INSTR( X , '&uuml;' ) 
    THEN SET TextString = REPLACE(TextString, '&uuml;','ü') ; 
    END IF ; 

    #small y, acute accent 
    IF INSTR( X , '&yacute;' ) 
    THEN SET TextString = REPLACE(TextString, '&yacute;','ý') ; 
    END IF ; 

    #small thorn, Icelandic 
    IF INSTR( X , '&thorn;' ) 
    THEN SET TextString = REPLACE(TextString, '&thorn;','þ') ; 
    END IF ; 

    #small y, umlaut mark 
    IF INSTR( X , '&yuml;' ) 
    THEN SET TextString = REPLACE(TextString, '&yuml;','ÿ') ; 
    END IF ; 

    RETURN TextString ; 

    END$$ 

    DELIMITER ;

And execute below query :

SELECT HTML_UnEncode('this is a &lt;a&gt;test, nothing more');
Balmipour
  • 2,985
  • 1
  • 24
  • 28
Bhargav
  • 556
  • 4
  • 8
  • 1
    I'm reading entries produced by a (buggy ?) CKeditor, and also had to put ampersand replacement in first position, **also affecting X** so that chars like `"é"`, which were sometimes escaped as `&acute`; i.e. `#ampersand` `IF INSTR( X , '&' )` `THEN SET TextString = REPLACE(TextString, '&','&') ;` `SET X = REPLACE(X, '&','&') ;` `END IF ;` This seems anormal behaviour to me, so I did only fix the typo producing error in your answer, but I wouldn't be much surprised to read I'm not the only one in that case. Hope it helps. – Balmipour Feb 17 '17 at 09:17
  • Also had to add the single quote. `REPLACE(TextString, ''',"'")` This wasn't added in my Edit. (_and I'm not sure consecutive edits are a good practice. is it ? How is it considered by the community ?_) – Balmipour Feb 17 '17 at 09:36
  • missing basic stuff like `™`, `“`, `”` – But those new buttons though.. Oct 30 '17 at 17:59
  • Perfect function, works great! Except, should be used with correct charset! With "utf8" in my case. Define all 3 charsets - for parameter, for return and for declared `TextString` https://imgur.com/a/dQnjiRM – Arnis Juraga Feb 13 '20 at 12:46
  • 1
    Made some more changes and created gist https://gist.github.com/arnisjuraga/c90a85a856ee878130105d0c60a4a181 – Arnis Juraga Feb 13 '20 at 14:09
20

The following function can be used to decode numeric entities like &#12345;

CREATE FUNCTION entity_decode(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8
    NO SQL
    DETERMINISTIC
BEGIN

    DECLARE tmp TEXT    CHARSET utf8 DEFAULT txt;
    DECLARE entity  TEXT CHARSET utf8;
    DECLARE pos1    INT DEFAULT 1;
    DECLARE pos2    INT;
    DECLARE codepoint   INT;

    IF txt IS NULL THEN
        RETURN NULL;
    END IF;
    LOOP
        SET pos1 = LOCATE('&#', tmp, pos1);
        IF pos1 = 0 THEN
            RETURN tmp;
        END IF;
        SET pos2 = LOCATE(';', tmp, pos1 + 2);
        IF pos2 > pos1 THEN
            SET entity = SUBSTRING(tmp, pos1, pos2 - pos1 + 1);
            IF entity REGEXP '^&#[[:digit:]]+;$' THEN
                SET codepoint = CAST(SUBSTRING(entity, 3, pos2 - pos1 - 2) AS UNSIGNED);
                IF codepoint > 31 THEN
                    SET tmp = CONCAT(LEFT(tmp, pos1 - 1), CHAR(codepoint USING utf32), SUBSTRING(tmp, pos2 + 1));
                END IF;
            END IF;
        END IF;
        SET pos1 = pos1 + 1;
    END LOOP;
END
epos_jk
  • 289
  • 2
  • 6
  • 2
    You should add **DELIMITER $$** to the first line in your function – Mohamad Hamouday Nov 21 '17 at 13:35
  • 2
    It's been a very long time now, but for those readers that still stumble upon this like me: Note that some `𞉀` doesn't fit back into UTF-8. A character that matches that pattern but only fits into UTF-16 or UTF-32 is likely to throw an error in CONCAT() about mixed collations. Consider handling or skipping characters that fall outside of your UTF-X scope like thus: `codepoint > 31 AND codepoint < 65535` – A554551N Aug 01 '19 at 13:24
  • 1
    Alternatively to what @A554551N said, if you want those characters to work you could change all of the `utf8`'s to `utf8mb4`. (Could even get rid of the `codepoint > 31` if statement if you've got some other characters like ` ` that you want changed. – abney317 Sep 26 '19 at 23:02
4

This adds support for hex encoded numeric values (which I get from Perl using "use HTML::Entities qw(encode_entities_numeric);".

DELIMITER $$
    CREATE FUNCTION entity_decode(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8
    NO SQL
    DETERMINISTIC
    BEGIN

    DECLARE tmp TEXT    CHARSET utf8 DEFAULT txt;
    DECLARE entity  TEXT CHARSET utf8;
    DECLARE pos1    INT DEFAULT 1;
    DECLARE pos2    INT;
    DECLARE codepoint   INT;

    IF txt IS NULL THEN
        RETURN NULL;
    END IF;
    LOOP
        SET pos1 = LOCATE('&#', tmp, pos1);
        IF pos1 = 0 THEN
            RETURN tmp;
        END IF;
        SET pos2 = LOCATE(';', tmp, pos1 + 2);
        IF pos2 > pos1 THEN
            SET entity = SUBSTRING(tmp, pos1, pos2 - pos1 + 1);
            IF entity REGEXP '^&#[[:digit:]]+;$' THEN
                SET codepoint = CAST(SUBSTRING(entity, 3, pos2 - pos1 - 2) AS UNSIGNED);
                IF codepoint > 31 THEN
                    SET tmp = CONCAT(LEFT(tmp, pos1 - 1), CHAR(codepoint USING utf32), SUBSTRING(tmp, pos2 + 1));
                END IF;
            END IF;
            IF entity REGEXP '^&#x[[:digit:]]+;$' THEN
                SET codepoint = CAST(CONV(SUBSTRING(entity, 4, pos2 - pos1 - 3), 16, 10) AS UNSIGNED);
                IF codepoint > 31 THEN
                    SET tmp = CONCAT(LEFT(tmp, pos1 - 1), CHAR(codepoint USING utf32), SUBSTRING(tmp, pos2 + 1));
                END IF;
            END IF;
        END IF;
        SET pos1 = pos1 + 1;
    END LOOP;
END$$
DELIMITER ;
Robotometry
  • 428
  • 3
  • 8
  • This works great, but note that the second regex, which matches the hex encoded entities should be '^[[:xdigit:]]+;$' instead of '^[[:digit:]]+;$' (change digit to xdigit) so that it also matches hex digits A-F. – SeanN Jul 30 '18 at 16:58
0

I have created this function:

DELIMITER $$
DROP FUNCTION IF EXISTS DecodeHTML$$

CREATE FUNCTION DecodeHTML(X VARCHAR(255)) RETURNS VARCHAR(255) CHARSET utf8 DETERMINISTIC
BEGIN

DECLARE TextString VARCHAR(255);
DECLARE word VARCHAR(255);

SET TextString = X;
SET word = '&aacute;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'á');
END IF;

SET word = '&eacute;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'é') ;
END IF;

SET word = '&iacute;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'í') ;
END IF;

SET word = '&oacute;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'ó') ;
END IF;

SET word = '&uacute;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'ú') ;
END IF;

SET word = '&Aacute;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Á') ;
END IF;

SET word = '&Eacute;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'É') ;
END IF;

SET word = '&Iacute;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Í') ;
END IF;

SET word = '&Oacute;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Ó') ;
END IF;

SET word = '&Uacute;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Ú') ;
END IF;

SET word = '&Agrave;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'À');
END IF;

SET word = '&Acirc;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Â');
END IF;

SET word = '&Auml;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Ä');
END IF;

SET word = '&AElig;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Æ');
END IF;

SET word = '&Egrave;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'È');
END IF;

SET word = '&Ecirc;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Ê');
END IF;

SET word = '&Igrave;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Ì');
END IF;

SET word = '&ETH;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Ð');
END IF;

SET word = '&Ograve;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Ò');
END IF;

SET word = '&Ugrave;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Ù');
END IF;

SET word = '&Ocirc;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Ô');
END IF;

SET word = '&Ouml;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Ö');
END IF;

SET word = '&Oslash;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Ø');
END IF;

SET word = '&Uuml;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Ü');
END IF;

SET word = '&uuml;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'ü');
END IF;

SET word = '&yuml;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'ÿ');
END IF;

SET word = '&yacute;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'ý');
END IF;

SET word = '&otilde;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'õ');
END IF;

SET word = '&iuml;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'ï');
END IF;

SET word = '&ccedil;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'ç');
END IF;

SET word = '&euml;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'ë');
END IF;

SET word = '&Icirc;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Î');
END IF;

SET word = '&euro;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'€') ;
END IF;

SET word = '&ntilde;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'ñ') ;
END IF;

SET word = '&Ntilde;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'Ñ') ;
END IF;

SET word = '&pound;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'£') ;
END IF;

SET word = '&copy;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'©') ;
END IF;

SET word = '&reg;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'®');
END IF;

SET word = '&nbsp;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,' ');
END IF;

SET word = '&curren;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'¤');
END IF;

SET word = '&brvbar;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'¦');
END IF;

SET word = '&para;';

IF INSTR( X , word ) THEN 
    SET TextString = REPLACE(TextString, word,'¶');
END IF;

RETURN TextString;
END$$
DELIMITER ;
Daniel
  • 214
  • 4
  • 9
-2

Nope, there is no function in mySQL to decode html entities since it is not concerned with html :)

Sarfraz
  • 377,238
  • 77
  • 533
  • 578