3

I am trying to get the value from this function inside a <div>. The value I want to get is "Unidos do Jaçanã", inside this html:

<div class="event-data">
                <h1 class="event__title ng-binding">
                    Unidos do Jaçanã
                    <!--<a href="{{event.singleUrl}}" target="_blank"><i class="fa fa-external-link"></i></a>-->
                    <span class="event__subtitle ng-binding">Carnaval de Rua 2016</span>
                </h1>


                <!-- ngRepeat: occs in event.occurrences --><!-- ngIf: occs.inPeriod --><div class="event__occurrences ng-scope" ng-repeat="occs in event.occurrences" ng-if="occs.inPeriod">
                    <div class="event__venue">
                        <a href="http://spcultura.prefeitura.sp.gov.br/espaco/2141/" class="ng-binding">Rua Antônio César Neto, 347</a>
                    </div>
                    <div class="event__time ng-binding">Dia 6 de fevereiro de 2016 às 14:00</div><br>
                     <div class="event__time ng-binding">bloco de rua do tradicional bairro do jacana</div>
                    <!--a href="#" class="js-more-occurrences"><i class="fa fa-plus-circle"></i></a-->
                </div><!-- end ngIf: occs.inPeriod --><!-- end ngRepeat: occs in event.occurrences -->

               <!-- <div class="event__languages" style="margin: -10px 0 10px 0">
                    <h4 class="event__languages--title">{{event.terms.linguagem.length == 1 ? 'Linguagem' : 'Linguagens'}}:</h4> {{event.terms.linguagem.join(', ')}}
                </div> -->
                <span class="event__classification ng-binding">Livre</span>

                <div class="event__price ng-binding">
                    <span class="fa-stack">
                        <i class="fa fa-circle fa-stack-2x"></i>
                        <i class="fa fa-usd fa-stack-1x fa-inverse"></i>
                    </span>
                    gratuito
                </div>

                <!-- ngIf: event.traducaoLibras == 'Sim' && event.descricaoSonora == 'Sim' -->
                <!-- ngIf: event.traducaoLibras == 'Sim' && event.descricaoSonora != 'Sim' -->
                <!-- ngIf: event.traducaoLibras != 'Sim' && event.descricaoSonora == 'Sim' -->

               <!--                <div ng-if="event.project.name">
                    <h4>projeto:</h4>
                    <a href="{{event.project.singleUrl}}">{{event.project.name}}</a>
                </div>
 <div ng-if="event.owner.name">
                    <h4>publicado por:</h4>
                    <a href="{{event.owner.singleUrl}}">{{event.owner.name}}</a>
                </div> -->
                <a href="http://spcultura.prefeitura.sp.gov.br/evento/23693/" target="_blank" class="event__info">Mais informações</a>
            </div>

And, for that, I am using jquery cheerios module, with this code:

var cheerio = require('cheerio');
var request = require('request');


request({
    method: 'GET',
    url: 'http://carnavalderua.prefeitura.sp.gov.br/eventos/blocos/'
}, function(err, response, html) {
        if(err) return console.error(err)
        $ = cheerio.load(html)
        $('h1.event__title').filter(function() {
            var data = $(this)
            var title = data.html()
            console.log(title)
        })
})

And I am getting this log:

{{event.name}}
                    <!--<a href="{{event.singleUrl}}" target="_blank"><i class="fa fa-external-link"></i></a>-->
                    <span class="event__subtitle">{{event.subTitle}}</span>

I think that the value I am looking for is inside {{event.name}}. How can I possible acess this value?

Thanks in advance.

Mr Lister
  • 45,515
  • 15
  • 108
  • 150
Paulo Hen
  • 43
  • 5
  • Can you include the html in the post directly instead of using an image? – Tah Jan 30 '16 at 19:34
  • Why do you use `.filter( function() )`? Try `text()` or `contents()` instead. – Vlad Zhukov Jan 30 '16 at 21:05
  • I tried it, but I get the same logs... – Paulo Hen Jan 30 '16 at 21:46
  • This is an AngularJS page that no longer exists. But basically the content is injected into the page dynamically, after the page loads. So your HTTP request is returning a blank template, not the populated tree you see in the dev tools. You'll generally want to use a browser automation library like Puppeteer for this. See the canonical thread: [How can I scrape pages with dynamic content using node.js?](https://stackoverflow.com/questions/28739098/how-can-i-scrape-pages-with-dynamic-content-using-node-js) – ggorlen Sep 01 '23 at 18:31

1 Answers1

-1

You can try a regex..
var fullTitle = document.querySelector('h1.event__title')
The selector would give you the first title in the document that matches

and then
var titleStriped = fullTitle.innerHTML.match(/^\s*(.*)\s*</)[1]
would match the current html you're getting and give you "Unidos do Jaçanã"

You can also use a jQuery selector, the .html() method and the same regex if you're more comfortable with jQuery

Note: you may need to modify your regex if you need to match different results

estherz
  • 152
  • 1
  • 7