I was having difficulty copy-pasting the string from SO to MATLAB, as the "è" showed up as char(65533)
(instead of the correct char(232)
) for some reason...
Anyway, I threw together a small conversion utility to convert strings or cellstrings to their Unicode-in-HTML equivalent, to complement horchler's answer:
function html = toHTML(strings)
%% Initialize
% Basic IO check
if ~iscellstr(strings) && ~ischar(strings)
error(...
'toHTML:invalid_input',...
['Invalid input class: ''%s''.\n',...
'Supported input types are ''char'' or a ''cell'' containing ''char''.'], class(strings));
end
% Provide support for
% - Single and multiline line char arrays
% - Cellstrings
wasChar = ischar(strings);
if wasChar
if size(strings,1) > 1
strings(:, end+1) = char(10);
end
strings = {strings};
end
%% Convert all strings to their unicode representation in HTML
% Just for abbreviation
uf = {'UniformOutput',false};
% Convert all characters to their HTML unicode representation
html = cellfun(@transpose, strings, uf{:});
html = cellfun(@(x) cellstr(num2str(x(:)+0)), html, uf{:});
html = cellfun(@(x) cellfun(@(y) ['&#' strtrim(y) ';'],x, uf{:}), html, uf{:});
% Include HTML tags
html = cellfun(@(x) ['<html>' [x{:}] '</html>'], html, uf{:});
% Take care of newlining
html = regexprep(html, ' ', '<br>');
html = regexprep(html, '<br></html>$', '</html>');
% Make output type consistent with input type
if wasChar
html = [html{:}];
end
end
I'm currently submitting this to the FEX as well. If anyone knows whether such a thing exists already, please let me know.