1

Does Excel VBA provide any sort of method, function, or API for Punycode or converting to and from Unicode/Ascii for Internationalized Domain Names?

I was able to find a JavaScript version (below) which seems to work for IDNA2008 Standards. I am still able to use this in VBA, but I think this is a little clunky...

Another method I've tried is to Web Scrape the responses from an Online Punycode converter (http://www.unicode.org/cldr/utility/idna.jsp?a=), but I'm really not crazy about this as websites are sometimes not available and its not very efficient...

Since the different web browsers (Internet Explorer, Firefox, and Chrome) utilize different standards. I need to be able to convert to and from Puny using all standards (IDNA2003, UTS46, IDNA2008) so that I can produce/capture all possible variations.

For example using the domain "schüloß.de" I should be able to produce 2 different variations:

Unicode:

schüloss.de <- IDNA2003
schüloß.de  <- IDNA2008

ASCII:

xn--schloss-p2a.de <- IDNA2003
xn--schlo-pqa4r.de <- IDNA2008

I'm not aware of any web object or built in functionality to help with Punycode...

Would someone have a reliable and efficient method to perform Punycode conversions for all standards? Any help would be greatly appreciated.

Option Explicit
'code loosely based on 'http://stackoverflow.com/questions/5773683/excel-vba-parsed-json-object-loop
'Tools->References->
'MSScriptControl; Microsoft Script Control 1.0;  {0E59F1D2-1FBE-11D0-8FF2-00A0D10038BC}; C:\Windows\SysWOW64\msscript.ocx
Private moScriptEngine As ScriptControl

Private Property Get ScriptEngine()
    If moScriptEngine Is Nothing Then
        Set moScriptEngine = New ScriptControl
        moScriptEngine.Language = "JScript"
        'moScriptEngine.AllowUI = True
    End If
    Set ScriptEngine = moScriptEngine
End Property

Private Function IDNA2008_JScript() As String
' toASCII   = Unicode   ->  Punycode    Example:    ScriptEngine.Eval("punycode.toASCII('UNICODE')") = PUNYCODE
' toUnicode = Punycode  ->  Unicode     Example:    ScriptEngine.Eval("punycode.toUnicode('PUNYCODE')") = UNICODE
' TakenFrom: https://github.com/bestiejs/punycode.js/
    IDNA2008_JScript = ";(function(root) { var freeExports = typeof exports == 'object' && exports && !exports.nodeType && exports; var freeModule = typeof module == 'object' && module && !module.nodeType && module; var freeGlobal = typeof global == 'object' && global; " & _
    "if ( freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal || freeGlobal.self === freeGlobal ) { root = freeGlobal;} var punycode, maxInt = 2147483647, base = 36, tMin = 1, tMax = 26, skew = 38, damp = 700, initialBias = 72, initialN = 128, " & _
    "delimiter = '-', regexPunycode = /^xn--/, regexNonASCII = /[^\x20-\x7E]/, regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g, errors = { 'overflow': 'Overflow: input needs wider integers to process', 'not-basic': 'Illegal input >= 0x80 (not a basic code point)', 'invalid-input': 'Invalid input' }, " & _
    "baseMinusTMin = base - tMin, floor = Math.floor, stringFromCharCode = String.fromCharCode, key; " & _
    "function error(type) { throw new RangeError(errors[type]); } function map(array, fn) { var length = array.length; var result = []; while (length--) { result[length] = fn(array[length]); } return result; } " & _
    "function mapDomain(string, fn) { var parts = string.split('@'); var result = ''; if (parts.length > 1) { result = parts[0] + '@'; string = parts[1]; } string = string.replace(regexSeparators, '\x2E'); var labels = string.split('.'); var encoded = map(labels, fn).join('.'); return result + encoded; } " & _
    "function ucs2decode(string) { var output = [], counter = 0, length = string.length, value, extra; while (counter < length) { value = string.charCodeAt(counter++); if (value >= 0xD800 && value <= 0xDBFF && counter < length) { extra = string.charCodeAt(counter++); if ((extra & 0xFC00) == 0xDC00) { " & _
    "output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000); } else { output.push(value); counter--; } } else { output.push(value); } } return output; } function ucs2encode(array) { return map(array, function(value) { var output = ''; if (value > 0xFFFF) { " & _
    "value -= 0x10000; output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800); value = 0xDC00 | value & 0x3FF; } output += stringFromCharCode(value); return output; }).join(''); } function basicToDigit(codePoint) { if (codePoint - 48 < 10) { return codePoint - 22; } " & _
    "if (codePoint - 65 < 26) { return codePoint - 65; } if (codePoint - 97 < 26) { return codePoint - 97; } return base; } function digitToBasic(digit, flag) { return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5); } function adapt(delta, numPoints, firstTime) { var k = 0; delta = firstTime " & _
    "? floor(delta / damp) : delta >> 1; delta += floor(delta / numPoints); for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) { delta = floor(delta / baseMinusTMin); } return floor(k + (baseMinusTMin + 1) * delta / (delta + skew)); } function decode(input) { " & _
    "var output = [], inputLength = input.length, out, i = 0, n = initialN, bias = initialBias, basic, j, index, oldi, w, k, digit, t, baseMinusT; basic = input.lastIndexOf(delimiter); if (basic < 0) { basic = 0; } for (j = 0; j < basic; ++j) { if (input.charCodeAt(j) >= 0x80) { error('not-basic'); } " & _
    "output.push(input.charCodeAt(j)); } for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) { for (oldi = i, w = 1, k = base; /* no condition */; k += base) { if (index >= inputLength) { error('invalid-input'); } digit = basicToDigit(input.charCodeAt(index++)); " & _
    "if (digit >= base || digit > floor((maxInt - i) / w)) { error('overflow'); } i += digit * w; t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias); if (digit < t) { break; } baseMinusT = base - t; if (w > floor(maxInt / baseMinusT)) { error('overflow'); } w *= baseMinusT; } " & _
    "out = output.length + 1; bias = adapt(i - oldi, out, oldi == 0); if (floor(i / out) > maxInt - n) { error('overflow'); } n += floor(i / out); i %= out; output.splice(i++, 0, n); } return ucs2encode(output); } function encode(input) { var n, delta, handledCPCount, basicLength, bias, j, m, q, k, t, " & _
    "currentValue, output = [], inputLength, handledCPCountPlusOne, baseMinusT, qMinusT; input = ucs2decode(input); inputLength = input.length; n = initialN; delta = 0; bias = initialBias; for (j = 0; j < inputLength; ++j) { currentValue = input[j]; if (currentValue < 0x80) { " & _
    "output.push(stringFromCharCode(currentValue)); } } handledCPCount = basicLength = output.length; if (basicLength) { output.push(delimiter); } while (handledCPCount < inputLength) { for (m = maxInt, j = 0; j < inputLength; ++j) { currentValue = input[j]; " & _
    "if (currentValue >= n && currentValue < m) { m = currentValue; } } handledCPCountPlusOne = handledCPCount + 1; if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) { error('overflow'); } delta += (m - n) * handledCPCountPlusOne; n = m; for (j = 0; j < inputLength; ++j) { " & _
    "currentValue = input[j]; if (currentValue < n && ++delta > maxInt) { error('overflow'); } if (currentValue == n) { for (q = delta, k = base; /* no condition */; k += base) { t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias); if (q < t) { break; } qMinusT = q - t; baseMinusT = base - t; " & _
    "output.push( stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0)) ); q = floor(qMinusT / baseMinusT); } output.push(stringFromCharCode(digitToBasic(q, 0))); bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength); delta = 0; ++handledCPCount; } } " & _
    "++delta; ++n; } return output.join(''); } function toUnicode(input) { return mapDomain(input, function(string) { return regexPunycode.test(string) ? decode(string.slice(4).toLowerCase()) : string; }); } function toASCII(input) { return mapDomain(input, function(string) { " & _
    "return regexNonASCII.test(string) ? 'xn--' + encode(string) : string; }); } punycode = { 'version': '1.4.1', 'ucs2': { 'decode': ucs2decode, 'encode': ucs2encode }, 'decode': decode, 'encode': encode, 'toASCII': toASCII, 'toUnicode': toUnicode }; if ( typeof define == 'function' && " & _
    "typeof define.amd == 'object' && define.amd ) { define('punycode', function() { return punycode; }); } else if (freeExports && freeModule) { if (module.exports == freeExports) { freeModule.exports = punycode; } else { for (key in punycode) { punycode.hasOwnProperty(key) " & _
    "&& (freeExports[key] = punycode[key]); } } } else { root.punycode = punycode; } }(this));"
End Function

Public Function IDNA2008_toPUNY(ByVal strINPUT As String)
    ScriptEngine.AddCode IDNA2008_JScript
    On Error Resume Next
    IDNA2008_toPUNY = ScriptEngine.Eval("punycode.toASCII('" & strINPUT & "')")
    On Error GoTo 0
    Set moScriptEngine = Nothing
    If IsEmpty(IDNA2008_toPUNY) Then IDNA2008_toPUNY = vbNullString
End Function

Public Function IDNA2008_toUNICODE(ByVal strINPUT As String)
    ScriptEngine.AddCode IDNA2008_JScript
    On Error Resume Next
    IDNA2008_toUNICODE = ScriptEngine.Eval("punycode.toUnicode('" & strINPUT & "')")
    On Error GoTo 0
    Set moScriptEngine = Nothing
    If IsEmpty(IDNA2008_toUNICODE) Then IDNA2008_toUNICODE = vbNullString
End Function

Sub TestPunyCode()
    Debug.Print IDNA2008_toPUNY("schüloß.de")
    Debug.Print IDNA2008_toUNICODE("xn--schlo-pqa4r.de")
End Sub

Additional Info:
Using MS Office 2013 (32 bit) with Windows 7.

The solution can not attempt to communicate with the Unicode domains. I'm not against using the WinHttp object if that's even possible, but it cannot send or make any attempt to send outgoing network traffic to the domain for security reasons...

Unfortunately this is one of those questions that require a VBA solution without the need to install external software.

Again, any help is greatly appreciated!

B Hart
  • 1,108
  • 11
  • 20
  • Would the [StrConv](https://msdn.microsoft.com/en-us/library/office/gg264628.aspx) function provide sufficient functionality, or do you need more than what it has to offer? – IInspectable May 02 '16 at 08:54
  • @IInspectable - Thank you but the StrConv function does not perform Punycode conversions. ("schüloß.de" <> "xn--schlo-pqa4r.de"). – B Hart May 02 '16 at 09:01

0 Answers0