+1 for using Developer Tools. That tool/skill will serve you well.
You should seriously consider using the API. But you can use httr
and rvest
together for this (and I verified it's not against the site rules):
library(rvest)
library(httr)
library(tidyverse)
We'll get the page first since we need to scrape the popup menu data:
pg <- read_html("https://whalewisdom.com/filer/blue-harbour-group-lp#/tabholdings_tab_link")
qtr_nodes <- html_nodes(pg, "select[id='quarter_one'] option")
data_frame(
qtr = html_text(qtr_nodes),
value = html_attr(qtr_nodes, "value")
) %>%
filter(!grepl("ubscri", qtr)) -> qtrs
qtrs
## # A tibble: 10 x 2
## qtr value
## <chr> <chr>
## 1 Current Combined 13F/13D/G -1
## 2 Q3 2017 13F Filings 67
## 3 Q2 2017 13F Filings 66
## 4 Q1 2017 13F Filings 65
## 5 Q4 2016 13F Filings 64
## 6 Q3 2016 13F Filings 63
## 7 Q2 2016 13F Filings 62
## 8 Q1 2016 13F Filings 61
## 9 Q4 2015 13F Filings 60
## 10 Q3 2015 13F Filings 59
^^ is a translation table from pretty name to the popup value. The value is necessary for submitting the XHR request that happens behind the scenes.
Let's make a function to simulate that XHR request:
get_qtr <- function(qtr) {
GET(
url = "https://whalewisdom.com/filer/holdings",
httr::add_headers(
Host = "whalewisdom.com",
`User-Agent` = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:58.0) Gecko/20100101 Firefox/58.0",
Accept = "application/json, text/javascript, */*; q=0.01",
`Accept-Language` = "en-US,en;q=0.5",
Referer = "https://whalewisdom.com/filer/blue-harbour-group-lp",
`X-Requested-With` = "XMLHttpRequest",
Connection = "keep-alive"
),
query = list(
q1 = qtr,
id = "384", type_filter = "1,2,3,4", symbol = "",
change_filter = "1,2,3,4,5", minimum_ranking = "", minimum_shares = "",
is_etf = "0", sc = "true", `_search` = "false", rows = "25",
page = "1", sidx = "current_ranking", sord = "asc"
)
) -> res
stop_for_status(res)
res <- content(res)
map_df(res$rows, ~map(.x, ~ifelse(is.null(.x), NA, .x)))
}
We're just passing in value
to the qtr
parameter, but you could add params for the other bits, too.
Now, use the translation table above to get a randomly chosen set of data:
qtr_65 <- get_qtr(65)
glimpse(qtr_65)
## Observations: 19
## Variables: 24
## $ id <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## $ name <chr> "Investors Bancorp Inc", "Xilinx, Inc", "BWX TECHNOLOGIES INC", "AGCO Corp", "A...
## $ symbol <chr> "ISBC", "XLNX", "BWXT", "AGCO", "AVT", "WBMD", "AKAM", "RDC", "FFIV", "ADNT", "...
## $ permalink <chr> "isbc", "xlnx", "bwxt", "agco", "avt", "wbmd", "akam", "rdc", "ffiv", "adnt", "...
## $ security_type <chr> "SH", "SH", "SH", "SH", "SH", "SH", "SH", "SH", "SH", "SH", "SH", "SH", "SH", "...
## $ stock_id <int> 5284, 930, 7803, 600, 375, 838, 3527, 3658, 26, 198034, 5045, 72934, 812, 4116,...
## $ source_date <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""
## $ source_type <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""
## $ sector <chr> "FINANCE", "INFORMATION TECHNOLOGY", "INDUSTRIALS", "INDUSTRIALS", "INFORMATION...
## $ industry <chr> "TRUSTS & THRIFTS", "SEMICONDUCTORS", "ELECTRICAL EQUIPMENT", "MACHINERY", "ELE...
## $ current_shares <int> 29582428, 6058693, 5287927, 3813700, 4363874, 3361336, 2855493, 10542812, 93835...
## $ previous_shares <int> 29582428, 7514437, 10561086, 6835700, 5415074, 1795914, 2474193, 10542812, 8599...
## $ shares_change <int> 0, -1455744, -5273159, -3022000, -1051200, 1565422, 381300, 0, 78373, 1675570, ...
## $ position_change_type <chr> NA, "reduction", "reduction", "reduction", "reduction", "addition", "addition",...
## $ percent_shares_change <chr> "0.0", "-19.3726", "-49.9301", "-44.2091", "-19.4125", "87.1658", "15.4111", "0...
## $ current_ranking <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 999999, 999999, 999999
## $ previous_ranking <int> 3, 1, 2, 4, 5, 11, 7, 6, 8, 999999, 10, 9, 999999, 12, 14, 13, 15, 16, 17
## $ current_percent_of_portfolio <dbl> 15.3264, 12.6366, 9.0686, 8.2689, 7.1946, 6.3798, 6.1419, 5.9180, 4.8200, 4.387...
## $ previous_percent_of_portfolio <dbl> 13.7987, 15.1687, 14.0194, 13.2249, 8.6205, 2.9767, 5.5165, 6.6592, 4.1615, NA,...
## $ current_mv <chr> "425395000.0", "350738000.0", "251705000.0", "229508000.0", "199691000.0", "177...
## $ previous_mv <chr> "412675000.0", "453647000.0", "419275000.0", "395514000.0", "257812000.0", "890...
## $ percent_ownership <chr> "26.3298285", "2.4338473", "5.3287767", "4.5501506", "3.3856140", "8.6721677", ...
## $ quarter_first_owned <chr> "Q1 2014", "Q1 2015", "Q4 2013", "Q2 2014", "Q2 2015", "Q4 2016", "Q3 2016", "Q...
## $ quarter_id_owned <int> 53, 57, 52, 54, 58, 64, 63, 53, 61, 65, 47, 63, 65, 64, 64, 64, 64, 60, 64
I have no idea if ^^ is what's in the CSV since I'm not registering for an account, but you can verify and hopefully modify.