I'm trying to scrape a website using select
crate in rust. Here is site structure:
example.com/category-list/
example.com/cat/programming-questions/
qtitle = "How to become a programmer"
qid = 2
example.com/cat/networking-questions/
qtitle = "New question"
qid = 3
qtitle = "Other question"
qid = 4
Scraper code:
#[derive(Serialize, Deserialize, Debug)]
pub struct Question {
q_title: Vec<String>,
q_id: Vec<String>,
q_link: Vec<String>,
}
let mut questions_vector= Vec::new();
for response in reponse_list.iter() {
let mut q_title: Vec<String> = Vec::new();
Document::from(resp.as_str())
.select(Class("qTitle"))
.for_each(|f| q_title.push(f.to_string()));
let mut q_id: Vec<String> = Vec::new();
Document::from(resp.as_str())
.select(Class("qid"))
.for_each(|f| q_id.push(f.to_string()));
let mut q_link: Vec<String> = Vec::new();
Document::from(resp.as_str())
.select(Name("a"))
.filter_map(|f| f.attr("href"))
.for_each(|f| q_link.push(f.to_string()));
let question = Question {
q_title: q_title,
q_id: q_id,
q_link: q_link,
};
questions_vector.push(question);
}
println!("{}", serde_json::to_string(&questions_vector).unwrap());
Output of my code:
{
"q_title": [
"How to become a programmer",
],
"q_id": [
"2",
],
"q_link": [
"https://example.com/q/How-to-become-a-programmer"
]
},
{
"q_title": [
"New question",
"Other question"
],
"q_id": [
"3","4"
],
"q_link": [
"https://example.com/q/New-question",
"https://example.com/q/Other-question"
]
}
Purposed output:
{
"q_title": [
"How to become a programmer",
],
"q_id": [
"2",
],
"q_link": [
"https://example.com/q/How-to-become-a-programmer"
]
},
{
"q_title": [
"New question",
],
"q_id": [
"3",
],
"q_link": [
"https://example.com/q/New-question",
]
},
{
"q_title": [
"Other question"
],
"q_id": [
"4",
],
"q_link": [
"https://example.com/q/Other-question"
]
},
I need to refactor my code to achieve purposed output