trying to update a column of null values with instagramIds, this is my current approach but the console app just keeps running and doesn't update any values in the database.
public static async Task<InstagramUser> ScrapeInstagram(string url)
{
using (var client = new HttpClient())
{
var response = await client.GetAsync(url);
if (response.IsSuccessStatusCode)
{
// create html document
var htmlBody = await response.Content.ReadAsStringAsync();
var htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(htmlBody);
// select script tags
var scripts = htmlDocument.DocumentNode.SelectNodes("/html/body/script");
// preprocess result
var uselessString = "window._sharedData = ";
var scriptInnerText = scripts[0].InnerText
.Substring(uselessString.Length)
.Replace(";", "");
// serialize objects and fetch the user data
dynamic jsonStuff = JObject.Parse(scriptInnerText);
dynamic userProfile = jsonStuff["entry_data"]["ProfilePage"][0]["graphql"]["user"];
List<String> columnData = new List<String>();
//Update database query
string connectionString = @"Server=myProject-dev-db.cothtpanmcn7.ap-southeast-2.rds.amazonaws.com;Database=Projectdb;User Id=testadmin;Password=U8gs7vb7C7yvakXf;MultipleActiveResultSets=true;Trusted_Connection=False;";
using (SqlConnection con = new SqlConnection(connectionString))
{
//get null values from database
string query = "Select * from ApplicationUser where InstagramId is null";
using (SqlCommand command = new SqlCommand(query, con))
{
command.Connection.Open();
using (SqlDataReader reader = command.ExecuteReader())
{
while (reader.Read())
{
columnData.Add(reader.GetString(0));
}
}
}
for (int index = 0; index < columnData.Count(); index++)
{
//get username and scrape info
var instagramInfo = new InstagramUser
{
Id = userProfile.id,
};
columnData.Add(instagramInfo.ToString());
}
SqlCommand cmd = new SqlCommand("Update ApplicationUser Set InstagramId = '" + columnData + "'" + "where InstagramUsername = '" + userprofile.username + "'", con);
cmd.Connection.Open();
cmd.ExecuteNonQuery();
}
// create an InstagramUser
var instagramUser = new InstagramUser
{
FullName = userProfile.full_name,
FollowerCount = userProfile.edge_followed_by.count,
FollowingCount = userProfile.edge_follow.count,
Id = userProfile.id,
url = url
};
return instagramUser;
}
else
{
throw new Exception($"Something wrong happened {response.StatusCode} - {response.ReasonPhrase} - {response.RequestMessage}");
}
}
}
The current output:
{"FullName":null,"FollowerCount":0,"FollowingCount":0,"Id":"6978701146","url":null}
{"FullName":null,"FollowerCount":0,"FollowingCount":0,"Id":"6978701146","url":null}
{"FullName":null,"FollowerCount":0,"FollowingCount":0,"Id":"6978701146","url":null}
{"FullName":null,"FollowerCount":0,"FollowingCount":0,"Id":"6978701146","url":null}
{"FullName":null,"FollowerCount":0,"FollowingCount":0,"Id":"6978701146","url":null}
My current approach is to create a list, add all instagramIDs which are null to that list. From there I add all instagramIds to that list after scraping Instagram for their usernames e.g. https://www.instagram.com/therock/?__a=1
then I update the column InstagramUsername with their instagram Ids