12

I am trying to parse dates in RFC1123 format (Thu, 21 Jan 2010 17:47:00 EST).

Here is what I tried but none worked:

DateTime Date = DateTime.Parse(dt);
DateTime Date = DateTime.ParseExact(dt, "r", null);
halfer
  • 19,824
  • 17
  • 99
  • 186
Moon
  • 33,439
  • 20
  • 81
  • 132

2 Answers2

12

Have you tried something like:

string dateString, format;  
DateTime result;
CultureInfo provider = CultureInfo.InvariantCulture;

dateString = "Thu, 21 Jan 2010 17:47:00 EST";
format = "ddd, dd MMM yyyy hh:mm:ss EST";

result = DateTime.ParseExact(dateString, format, provider);
Console.WriteLine("{0} converts to {1}.", dateString, result.ToString());

I haven't tested it yet (will in a few moments)... but I believe that will do it for you.

Edit: It seems that the problem is that RFC1123 states that the timezone should always be GMT... which is why r or R did not work as a format for you. The problem is the EST. The pattern above accounts for EST, but it is static so if you have any other timezone you might be in trouble. The best solution would be to go with the RFC1123 standard and go to GMT and it should solve your problem. If you can't, let me know I might have a solution.

Edit 2: This is not a complete solution but what it does it isolates the timezone and still allows you to parse it. The code doesn't know the timezone that it is being presented with but you can throw any timezone abbreviation at it and it will parse the time. If you want to convert to GMT and then use r or R you can take the result of the regex match, put it against a lookup table (to see what the time offset it for that timezone abbreviation), then convert the time to GMT and parse from there. That would be a good solution but a little more work. Here's the code:

string dateString, format, pattern, tz;
DateTime result;
CultureInfo provider = CultureInfo.InvariantCulture;
pattern = @"[a-zA-Z]+, [0-9]+ [a-zA-Z]+ [0-9]+ [0-9]+:[0-9]+:[0-9]+ (?<timezone>[a-zA-Z]+)";
dateString = "Thu, 21 Jan 2010 17:47:00 EST";

Regex findTz = new Regex(pattern, RegexOptions.Compiled);

tz = findTz.Match(dateString).Result("${timezone}");

format = "ddd, dd MMM yyyy HH:mm:ss " + tz;

try
{
    result = DateTime.ParseExact(dateString, format, provider);
    Console.WriteLine("Timezone format is: {0}", format);
    Console.WriteLine("{0} converts to {1}.", dateString, result.ToString());
}
catch (FormatException)
{
    Console.WriteLine("{0} is not in the correct format.", dateString);
}

    Console.ReadLine();

Here is a list of UTC offsets for you if you would like to turn this into a timezone converter:

Timezone Abbreviations with UTC offsets

Tim C
  • 1,934
  • 12
  • 25
  • It makes sense. But the problem is that I have no control over the format of the date as it comes from an external source. Any ways to convert non-GMT timezone to GMT based and then applying "r" or "R" ?? – Moon Apr 05 '10 at 19:53
  • I wish I had a better answer then what I am about to tell you but... abbreviations are not recognized., I have a solution but it's not fun or elegant. I'll post it and cross my fingers that someone else can help you better than me! I am so sorry. – Tim C Apr 05 '10 at 20:17
  • Its not perfect but much better! :) I appreciate your help, I really do :) – Moon Apr 05 '10 at 21:22
  • you may use TryParseExact() instead of a try-catch with ParseExact(). – IgorK Feb 17 '12 at 12:06
0

I reached this question looking for a solution to the same problem. It's really incredible that in 12 years no solution in runtime

This is my solution:

    /// <summary>
    /// Regex expression matching date as "Thu, 15 Feb 2018 00:36:36.405 +0000 (UTC)"
    /// </summary>
    private static readonly Regex reMimeDateTzshift = new Regex(@"(?<dow>[A-Za-z]{3}),?\s+(?<day>\d{1,2})\s(?<month3>[A-Za-z]{3})\s(?<year>\d{4})\s(?<hour>\d+):(?<minutes>\d+):(?<seconds>\d+)(?<milliseconds>\.\d+)?\s(?<tzshift>[+\-]\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
    /// <summary>
    /// Regex expression matching date as "Wed, 24 Oct 2012 16:37:27 GMT"
    /// </summary>
    private static readonly Regex reMimeDateTimezone = new Regex(@"(?<dow>[A-Za-z]{3}),?\s+(?<day>\d{1,2})\s(?<month3>[A-Za-z]{3})\s(?<year>\d{4})\s(?<hour>\d+):(?<minutes>\d+):(?<seconds>\d+)(?<milliseconds>\.\d+)?\s(?<timezone>[A-Z]{3,4})", RegexOptions.Compiled | RegexOptions.IgnoreCase);
    /// <summary>
    /// Regex expression matching date as "Fri Aug 18 00:42 PDT 2006"
    /// </summary>
    private static readonly Regex reMimeDateAmerican = new Regex(@"(?<dow>[A-Za-z]{3}),?\s+(?<month3>[A-Za-z]{3})\s(?<day>\d{1,2})\s(?<hour>\d+):(?<minutes>\d+)\s(?<timezone>[A-Z]{3,4})\s(?<year>\d{4})", RegexOptions.Compiled | RegexOptions.IgnoreCase);
    private static readonly Dictionary<string, string> timezones = new Dictionary<string, string>(StringComparer.Ordinal) {
            // https://www.rfc-editor.org/rfc/rfc822
            // https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
            { "NZDT", "+1300" },
            { "NZST", "+1200" },
            { "AEDT", "+1100" },
            { "ACDT", "+1030" },
            { "AEST", "+1000" }, { "ChST", "+1000" },
            { "ACST", "+0930" },
            { "WIT", "+0900" }, { "KST", "+0900" }, { "JST", "+0900" },
            { "HKT", "+0800" }, { "WITA", "+0800" }, { "AWST", "+0800" }, 
            { "WIB", "+0700" }, 
            { "PKT", "+0500" }, 
            { "EAT", "+0300" }, { "IDT", "+0300" }, { "MSK", "+0300" }, { "EEST", "+0300" },
            { "CAT", "+0200" }, { "EET", "+0200" }, { "IST", "+0200" }, { "CEST", "+0200" }, { "MEST", "+0200" }, { "SAST", "+0200"},
            { "WAT", "+0100" }, { "CET", "+0100" }, { "MET", "+0100" }, { "WEST", "+0100" },
            { "UT" , "+0000" }, { "UTC", "+0000" }, { "GMT", "+0000" }, { "WET", "+0000" },
            { "EDT", "-0400" }, { "AST", "-0400" },
            { "EST", "-0500" }, { "CDT", "-0500" },
            { "CST", "-0600" }, { "MDT", "-0600" },
            { "MST", "-0700" }, { "PDT", "-0700" },
            { "PST", "-0800" }, { "AKDT", "-0800" },
            { "HDT", "-0900" }, { "AKST", "-0900" },
            { "HST", "-1000" },
            { "SST", "-1100" },
            // Note: rfc822 got the signs backwards for the military
            // timezones so some sending clients may mistakenly use the
            // wrong values.
            { "A", "+0100" }, { "B", "+0200" }, { "C", "+0300" },
            { "D", "+0400" }, { "E", "+0500" }, { "F", "+0600" },
            { "G", "+0700" }, { "H", "+0800" }, { "I", "+0900" },
            { "K", "+1000" }, { "L", "+1100" }, { "M", "+1200" },
            { "N", "-0100" }, { "O", "-0200" }, { "P", "-0300" },
            { "Q", "-0400" }, { "R", "-0500" }, { "S", "-0600" },
            { "T", "-0700" }, { "U", "-0800" }, { "V", "-0900" },
            { "W", "-1000" }, { "X", "-1100" }, { "Y", "-1200" },
            { "Z", "+0000" }
        };
    /// <summary>
    /// Tries to convert String to date
    /// If there is a run time error, the smallest possible date is returned<br/>
    /// Examples: <br/>
    /// <example>Wed, 04 Jan 2006 07:58:08 -0800</example><br/>
    /// <example>Wed, 04 Jan 2006 07:58:08 -0800 (PST)</example><br/>
    /// <example>"Wed, 24 Oct 2012 16:37:27 GMT (envelope-from dboutet@businessv.com)"</example><br/>
    /// <example>"Thu, 15 Feb 2018 00:36:36.405 +0000 (UTC)"</example><br/>
    /// <example>"Fri Aug 18 00:42 PDT 2006"</example>
    /// </summary>
    static public DateTime ConvertToDateTime(string date) {
        DateTime ReturnDateTime;
        String cleanDateTime;
        Match match;
        String tzShift;

        // Optimistic: Try directly
        // Valid for "dd MMM yyyy hh:mm:ss zzzz" and "ddd, dd MMM yyyy hh:mm:ss zzzz formats"
        if (DateTime.TryParse(date, CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal | DateTimeStyles.AllowWhiteSpaces, out ReturnDateTime)) {
            return ReturnDateTime;
        }

        // Try: Wed, 04 Jan 2006 07:58:08 -0800
        match = reMimeDateTzshift.Match(date);
        if (match != Match.Empty) {
            cleanDateTime = $"{match.Groups["dow"].Value}, {match.Groups["day"].Value} {match.Groups["month3"].Value} {match.Groups["year"].Value} {match.Groups["hour"].Value}:{match.Groups["minutes"].Value}:{match.Groups["seconds"].Value} {match.Groups["tzshift"].Value}";
            if (DateTime.TryParse(cleanDateTime, CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal | DateTimeStyles.AllowWhiteSpaces, out ReturnDateTime)) {
                return ReturnDateTime;
            }
        }

        // Try: "Wed, 04 Jan 2006 07:58:08 GMT". 
        match = reMimeDateTimezone.Match(date);
        if (match != Match.Empty) {
            tzShift = timezones.GetValueOrDefault(match.Groups["timezone"].Value, "+0000");
            cleanDateTime = $"{match.Groups["dow"].Value}, {match.Groups["day"].Value} {match.Groups["month3"].Value} {match.Groups["year"].Value} {match.Groups["hour"].Value}:{match.Groups["minutes"].Value}:{match.Groups["seconds"].Value} {tzShift}";
            if (DateTime.TryParse(cleanDateTime, CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal | DateTimeStyles.AllowWhiteSpaces, out ReturnDateTime)) {
                return ReturnDateTime;
            }
        }

        // Try: "Fri Aug 18 00:42 PDT 2006". 
        match = reMimeDateAmerican.Match(date);
        if (match != Match.Empty) {
            tzShift = timezones.GetValueOrDefault(match.Groups["timezone"].Value, "+0000");

            cleanDateTime = $"{match.Groups["dow"].Value}, {match.Groups["day"].Value} {match.Groups["month3"].Value} {match.Groups["year"].Value} {match.Groups["hour"].Value}:{match.Groups["minutes"].Value}:00 {tzShift}";
            if (DateTime.TryParse(cleanDateTime, CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal | DateTimeStyles.AllowWhiteSpaces, out ReturnDateTime)) {
                return ReturnDateTime;
            }
        }

        _loggerError(_logger, $"Date format not recognised: '{date}'", null);
        return DateTime.MinValue;

    }
Sourcerer
  • 1,891
  • 1
  • 19
  • 32