i've made a Program that goes a big chunk of text from a calendar website, searches for tags of events and puts them into a text file. I've added a function so you can do the query many days in the past, adn I wanted to add a search function, so you can search through the tags. I wanted to use a simple String.Contains(), but the output tags are Regex Matches, so I need to convert them.
My Code:
using System;
using System.Linq;
using System.Diagnostics;
using System.Threading;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
namespace ESEL_Scraper
{
class Program
{
static void Main(string[] args)
{
int queryResults = 0;
Stopwatch stopWatch = new Stopwatch();
stopWatch.Start();
Console.WriteLine("How many days do you want to go back?");
String userInput = Console.ReadLine();
Console.WriteLine("What do you want to search for?");
String userInput2 = Console.ReadLine();
int result = Int32.Parse(userInput);
for(int i = 0; i < result; i++) {
DateTime somePreviousDay = DateTime.Now.Date.AddDays(-i);
String somePreviousDayString = somePreviousDay.ToString("dd.MM.yyyy");
Console.WriteLine(somePreviousDayString);
WebClient client = new WebClient();
client.Encoding = System.Text.Encoding.UTF8;
string site = client.DownloadString($"https://esel.at/api/termine/data?date={somePreviousDayString}&selection=false");
String tags = "\"tags\":\"";
String endTags = "\",";
Regex regex = new Regex($"{tags}(.*?){endTags}");
MatchCollection matches = regex.Matches(site);
foreach (Match match in matches)
{
if(match.Contains(userInput2)) {
using (System.IO.StreamWriter file =
new System.IO.StreamWriter(@"eselTags.txt", true))
{
file.WriteLine(match);
queryResults++;
}
}
}
stopWatch.Stop();
TimeSpan ts = stopWatch.Elapsed;
string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
ts.Hours, ts.Minutes, ts.Seconds,
ts.Milliseconds / 10);
Console.WriteLine($"Time taken: " + elapsedTime);
Console.WriteLine($"Found tags: " + queryResults);
}
}
}
}
The big chunk of text: https://esel.at/api/termine/data?date=05.09.2020&selection=false
I hope someone can help me.