I am trying to extract text from a word document with the following format and inserting the data into a SQL database.
Word Document
Name of House: Aasleagh Lodge
Townland: Srahatloe
Near: Killary Harbour, Leenane
Status/Public Access: maintained, private fishing lodge
Date Built: 1838-1850, burnt 1923, rebuilt 1928
Source Code
var wordApp = new Microsoft.Office.Interop.Word.Application();
var wordDoc = wordApp.Documents.Open(@"C:\Users\mhoban\Documents\Book.docx");
var txt = wordDoc.Content.Text;
var regex = new Regex(@"(Name of House\: )(.+?)[\r\n]");
var allMatches = regex.Matches(txt);
foreach (Match match in allMatches)
{
var nameValue = match.Groups[2].Value;
var townValue = match.Groups[2].Value;
SqlConnection con = new SqlConnection(ConfigurationManager.ConnectionStrings["ConnectionString"].ToString());
SqlCommand com = new SqlCommand();
com.CommandText = "INSERT INTO Houses (Name, Townland) VALUES (@name, @town)";
com.Parameters.Add("@name", SqlDbType.NVarChar).SqlValue = nameValue;
com.Parameters.Add("@town", SqlDbType.NVarChar).SqlValue = townValue;
com.Connection = con;
con.Open();
com.ExecuteNonQuery();
con.Close();
}
This works perfectly the only thing is how would I write the code to insert the other fields of text for example this line
var regex = new Regex(@"(Name of House\: )(.+?)[\r\n]");
Inserts the name of the house in this case "Aasleagh Lodge" but how would I write this line to insert the townland?
I tried replacing "Townland" in the regex with the field name I require but I end up with singular records each only holding one different column value.
Is there a way I could insert the data at the same time maybe by using a list or something so this would not occur.
New Source Code
var wordApp = new Microsoft.Office.Interop.Word.Application();
var wordDoc = wordApp.Documents.Open(@"C:\Users\mhoban\Documents\Book.docx");
var txt = wordDoc.Content.Text;
using (var sr = new StringReader(txt))
{
var s = string.Empty;
var nameValue = new StringBuilder();
var townValue = new StringBuilder();
while ((s = sr.ReadLine()) != null)
{
if (s.StartsWith("Name of House"))
{
nameValue.Append(s.Split(new[] { ':' })[1].Trim());
}
else if (s.StartsWith("Townland"))
{
townValue.Append(s.Split(new[] { ':' })[1].Trim());
}
if (nameValue.Length > 0 && townValue.Length > 0)
{
SqlConnection con = new SqlConnection(ConfigurationManager.ConnectionStrings["ConnectionString"].ToString());
SqlCommand com = new SqlCommand();
com.CommandText = "INSERT INTO Houses (Name, Townland) VALUES (@name, @town)";
com.CommandText = "INSERT INTO Houses (Name) VALUES (@name)";
com.Parameters.Add("@name", SqlDbType.NVarChar).SqlValue = nameValue;
com.Parameters.Add("@town", SqlDbType.NVarChar).SqlValue = townValue;
com.Connection = con;
con.Open();
com.ExecuteNonQuery();
con.Close();
nameValue.Clear(); townValue.Clear();
}
}
}
Database Fields
[Id] NCHAR (10) NULL,
[Name] NVARCHAR (MAX) NULL,
[Townland] NVARCHAR (MAX) NULL,
[Near] NVARCHAR (MAX) NULL,
[Status] NVARCHAR (MAX) NULL,
[Built] NVARCHAR (MAX) NULL,
[Description] NVARCHAR (MAX) NULL,
[Families] NVARCHAR (MAX) NULL,
[Images] IMAGE NULL