0

As I understand $sce in Angular, the default behavior is to:

AngularJS will automatically run security checks on [sensitive HTML] (sanitizations, whitelists, depending on context

Also, as I understand Whitelisting (via $sceDelegateProvider.resourceUrlWhitelist)prevents sanitization.

Basically, I've got an app where trustasresourceurl is globally enabled (see accepted answer). Now, I want to exclude 1 controller from this global trust. In other words, I want to display this content as plain text with none of the styling bound to ng-bind-html:

<span style="font-family: Arial; font-size: 24px;"> A wizard is never late hardcode.</span>

I may need to create a filter to manually remove any HTML, but I hate to do that just to get back to the default angular behavior. Parsing HTML with regex is ugly and fragile. What do you suggest?

This code is close to what I want, but for whatever the style is removed so it fails to properly reproduce the problem. I'm guessing the whitelist failed so it's still sanitized.

var mainMod = angular.module('MainApp', ['ngSanitize']);

mainMod.controller('MainCtrl', ['$scope',
    function ($scope) {
      $scope.text = '<p>Hello! <a href="#">Link</a></p>';
  
      $scope.htmlContent = '<span style="font-family: Arial; font-size: 24px;"> A wizard is never late.</span>';
}]);

angular.module('MainCtrl', []).config(function($sceDelegateProvider) {
  $sceDelegateProvider.resourceUrlWhitelist([
    // Allow same origin resource loads.
    'self',
    // Allow loading from our assets domain.  Notice the difference between * and **.
    'https://codepen.io/**'
  ]);

  // The blacklist overrides the whitelist so the open redirect here is blocked.  I don't want to blocklist the whole resource.  Just this one controller.
  $sceDelegateProvider.resourceUrlBlacklist([
    ''
  ]);
});
body{
  padding:20px;
}
.search{
  margin-left:10px;
}
<script src="https://code.angularjs.org/1.5.11/angular.min.js"></script>
<script src="https://code.angularjs.org/1.5.11/angular-sanitize.min.js"></script>

<body ng-app='MainApp'>
  <div ng-controller="MainCtrl">  
    <div ng-bind-html="htmlContent"></div>
    <div ng-bind-html='text'></div>   
    
    <span style="font-family: Arial; font-size: 24px;"> A wizard is never late hardcode.  In real app this is what I get.  </span>
  </div>
</body>

Angular 1.5.11

P.Brian.Mackey
  • 43,228
  • 68
  • 238
  • 348

2 Answers2

1

Filter to get Text from HTML:

mainMod.filter('get_text', ['$sce', function($sce){
    return function(text) {
         console.log("actual HTML",text)
         var doc = new DOMParser().parseFromString(text, "text/html");
        var   textVal= doc.documentElement.textContent;
        console.log(textVal,"Only Text")
        return $sce.trustAsHtml(textVal)
    };
}]);

And In HTML:

 <div ng-bind-html="htmlContent|get_text"></div>

var mainMod = angular.module('MainApp', ['ngSanitize']);
    
mainMod.filter('get_text', ['$sce', function($sce){
    return function(text) {
         console.log("actual HTML",text)
         var doc = new DOMParser().parseFromString(text, "text/html");
        var   textVal= doc.documentElement.textContent;
        console.log(textVal,"Only Text")
        return $sce.trustAsHtml(textVal)
    };
}]);

mainMod.controller('MainCtrl', ['$scope',
    function ($scope) {
      $scope.text = '<p>Hello! <a href="#">Link</a></p>';
  
      $scope.htmlContent = '<span style="font-family: Arial; font-size: 24px;"> A wizard is never late.</span>';
}]);

angular.module('MainCtrl', []).config(function($sceDelegateProvider) {
  $sceDelegateProvider.resourceUrlWhitelist([
    // Allow same origin resource loads.
    'self',
    // Allow loading from our assets domain.  Notice the difference between * and **.
    'https://codepen.io/**'
  ]);

  // The blacklist overrides the whitelist so the open redirect here is blocked.  I don't want to blocklist the whole resource.  Just this one controller.
  $sceDelegateProvider.resourceUrlBlacklist([
    ''
  ]);
});
body{
  padding:20px;
}
.search{
  margin-left:10px;
}
<script src="https://code.angularjs.org/1.5.11/angular.min.js"></script>
<script src="https://code.angularjs.org/1.5.11/angular-sanitize.min.js"></script>

<body ng-app='MainApp'>
  <div ng-controller="MainCtrl">  
    <div ng-bind-html="htmlContent|get_text"></div>
    <div ng-bind-html='text'></div>   
    
    <span style="font-family: Arial; font-size: 24px;"> A wizard is never late hardcode.  In real app this is what I get.  </span>
  </div>
</body>
Ved
  • 11,837
  • 5
  • 42
  • 60
  • Note that as of 9/12/2017 support for `DOMParser` is poor: https://caniuse.com/#search=domparser. My JS testing framework is imploding on it. And the alternative `xmldom.DOMParser` gives different results from it's browser counterpart. Particularly when dealing with peer elements `

    visible content

    peer not displayed
    `
    – P.Brian.Mackey Sep 12 '17 at 20:17
  • For out of browser compatibility and testing I used the shim: `require('xmldom').DOMParser;`. I imagine this could also be used for backwards compatibility. Keep in mind the above issue. – P.Brian.Mackey Sep 14 '17 at 13:50
0

If you need more backwards compatibility and maybe flexibility with malformed HTML, then the following filter also works:

mainMod.filter('get_text', ['', function(){
    //A more forgiving HTML to text conversion
    //Wrap the content in a div and get inner text
    return function(text) {
         var doc = document.createElement('div');
         doc.innerHTML = text;
         return doc.textContent;//no need to trustAsHtml because always plain text
    };
}]);
P.Brian.Mackey
  • 43,228
  • 68
  • 238
  • 348