0

i've made a Program that goes a big chunk of text from a calendar website, searches for tags of events and puts them into a text file. I've added a function so you can do the query many days in the past, adn I wanted to add a search function, so you can search through the tags. I wanted to use a simple String.Contains(), but the output tags are Regex Matches, so I need to convert them.

My Code:

using System;
using System.Linq;
using System.Diagnostics;
using System.Threading;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
namespace ESEL_Scraper
{
    class Program
    {
        static void Main(string[] args)
        {
            int queryResults = 0;
            Stopwatch stopWatch = new Stopwatch();
            stopWatch.Start();
            Console.WriteLine("How many days do you want to go back?");
            String userInput = Console.ReadLine();
            Console.WriteLine("What do you want to search for?");
            String userInput2 = Console.ReadLine();
            int result = Int32.Parse(userInput);
            for(int i = 0; i < result; i++) {
            DateTime somePreviousDay = DateTime.Now.Date.AddDays(-i);
            String somePreviousDayString = somePreviousDay.ToString("dd.MM.yyyy");
            Console.WriteLine(somePreviousDayString);
            WebClient client = new WebClient();
            client.Encoding = System.Text.Encoding.UTF8;
            string site = client.DownloadString($"https://esel.at/api/termine/data?date={somePreviousDayString}&selection=false");
            String tags = "\"tags\":\"";
            String endTags = "\",";
            Regex regex = new Regex($"{tags}(.*?){endTags}");
            MatchCollection matches = regex.Matches(site);
            foreach (Match match in matches)
            {
                 if(match.Contains(userInput2)) {
                using (System.IO.StreamWriter file =
                new System.IO.StreamWriter(@"eselTags.txt", true))
                {
                    file.WriteLine(match);
                    queryResults++;
                }
            }

        }
            stopWatch.Stop();
            TimeSpan ts = stopWatch.Elapsed;
            string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
            ts.Hours, ts.Minutes, ts.Seconds,
            ts.Milliseconds / 10);     
            Console.WriteLine($"Time taken: " + elapsedTime); 
            Console.WriteLine($"Found tags: " + queryResults); 

    }
 

}
}
}

The big chunk of text: https://esel.at/api/termine/data?date=05.09.2020&selection=false

I hope someone can help me.

af2111
  • 301
  • 3
  • 12
  • You may use `match.Value.Contains(userInput2)`. Or better, incorporate `userInput2` into the regex pattern itself. **Edit:** I just checked the link and it looks like the "big chunk of text" is JSON, so use a [JSON parser](https://stackoverflow.com/q/6620165/8967612) instead. – 41686d6564 stands w. Palestine Sep 05 '20 at 13:25

1 Answers1

0
 String tags = "\"tags\":\"";
 String endTags = "\",";
 Regex regex = new Regex($"{tags}(.*?){endTags}");
 MatchCollection matches = regex.Matches(site);

The above code of your will only fetch string between "tags:" and "," i.e., your tags property from the json string. So the matches will contains the substring from tags.

You can create class and deserialize the big chunk of text to that class and with the help of linq you can search through tags are write the entire object into the file.

As an alternate solution I have modified your code a bit. please check below

int queryResults = 0;
            Stopwatch stopWatch = new Stopwatch();
            stopWatch.Start();
            Console.WriteLine("How many days do you want to go back?");
            String userInput = Console.ReadLine();
            Console.WriteLine("What do you want to search for?");
            String userInput2 = Console.ReadLine();
            int result = Int32.Parse(userInput);
            for (int i = 0; i < result; i++)
            {
                DateTime somePreviousDay = DateTime.Now.Date.AddDays(-i);
                String somePreviousDayString = somePreviousDay.ToString("dd.MM.yyyy");
                Console.WriteLine(somePreviousDayString);
                WebClient client = new WebClient();
                client.Encoding = System.Text.Encoding.UTF8;
                string site = client.DownloadString($"https://esel.at/api/termine/data?date={somePreviousDayString}&selection=false");

                var siteData = JToken.Parse(site);
                var termineList = siteData["termine"].Value<JArray>();
                var searchResults = termineList.Where(x => x.SelectToken("tags").Value<string>().Contains(userInput2));
                foreach (var searchResult in searchResults)
                {
                    string dataToWrite = JsonConvert.SerializeObject(searchResult);
                    
                        using (System.IO.StreamWriter file =
                        new System.IO.StreamWriter(@"eselTags.txt", true))
                        {
                            file.WriteLine(searchResult);
                            queryResults++;
                        }
                }
                stopWatch.Stop();
                TimeSpan ts = stopWatch.Elapsed;
                string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
                ts.Hours, ts.Minutes, ts.Seconds,
                ts.Milliseconds / 10);
                Console.WriteLine($"Time taken: " + elapsedTime);
                Console.WriteLine($"Found tags: " + queryResults);

            }
Purushothaman
  • 519
  • 4
  • 16