Here's what I eventually ended up doing. My co-worker & I came up with a query that would give us a list of Event_ID's that had duplicate data (we actually used Access 2010's query builder and MySQL-ified it). Bear in mind this is a complete solution where the original question didn't have as much detail as far as linked tables. If you've got questions about this, feel free to ask & I'll try to help:
SELECT `Events_new`.`Event_ID`
FROM Events_new
GROUP BY `Events_new`.`PCBID`, `Events_new`.`EventType_ID`, `Events_new`.`DateTime`, `Events_new`.`User`
HAVING (((COUNT(`Events_new`.`PCBID`)) > 1) AND ((COUNT(`Events_new`.`User`)) > 1) AND ((COUNT(`Events_new`.`DateTime`)) > 1))
From this I processed each Event_ID
to remove the duplicates in an iterative manner. Basically I had to delete all the child rows starting from the last lowest table so that I didn't run afoul of foreign key restraints.
This chunk of code was written in LinqPAD as C# statements: (sbCommonFunctions is an inhouse DLL designed to make most (but not all as you'll see) database functions be handled the same way or easier)
sbCommonFunctions.Database testDB = new sbCommonFunctions.Database();
testDB.Connect("production", "database", "user", "password");
List<string> listEventIDs = new List<string>();
List<string> listEventDetailIDs = new List<string>();
List<string> listTestInformationIDs = new List<string>();
List<string> listTestStepIDs = new List<string>();
List<string> listMeasurementIDs = new List<string>();
string dtQuery = (String.Format(@"SELECT `Events_new`.`Event_ID`
FROM Events_new
GROUP BY `Events_new`.`PCBID`,
`Events_new`.`EventType_ID`,
`Events_new`.`DateTime`,
`Events_new`.`User`
HAVING (((COUNT(`Events_new`.`PCBID`)) > 1)
AND ((COUNT(`Events_new`.`User`)) > 1)
AND ((COUNT(`Events_new`.`DateTime`)) > 1))"));
int iterations = 0;
DataTable dtEventIDs = getDT(dtQuery, testDB);
while (dtEventIDs.Rows.Count > 0)
{
Console.WriteLine(dtEventIDs.Rows.Count);
Console.WriteLine(iterations);
iterations++;
foreach(DataRowView eventID in dtEventIDs.DefaultView)
{
listEventIDs.Add(eventID.Row[0].ToString());
DataTable dtEventDetails = testDB.QueryDatabase(String.Format(
"SELECT * FROM EventDetails_new WHERE Event_ID = {0}",
eventID.Row[0]));
foreach(DataRowView drvEventDetail in dtEventDetails.DefaultView)
{
listEventDetailIDs.Add(drvEventDetail.Row[0].ToString());
}
DataTable dtTestInformation = testDB.QueryDatabase(String.Format(
@"SELECT TestInformation_ID
FROM TestInformation_new
WHERE Event_ID = {0}",
eventID.Row[0]));
foreach(DataRowView drvTest in dtTestInformation.DefaultView)
{
listTestInformationIDs.Add(drvTest.Row[0].ToString());
DataTable dtTestSteps = testDB.QueryDatabase(String.Format(
@"SELECT TestSteps_ID
FROM TestSteps_new
WHERE TestInformation_TestInformation_ID = {0}",
drvTest.Row[0]));
foreach(DataRowView drvTestStep in dtTestSteps.DefaultView)
{
listTestStepIDs.Add(drvTestStep.Row[0].ToString());
DataTable dtMeasurements = testDB.QueryDatabase(String.Format(
@"SELECT Measurements_ID
FROM Measurements_new
WHERE TestSteps_TestSteps_ID = {0}",
drvTestStep.Row[0]));
foreach(DataRowView drvMeasurements in dtMeasurements.DefaultView)
{
listMeasurementIDs.Add(drvMeasurements.Row[0].ToString());
}
}
}
}
testDB.Disconnect();
string mysqlConnection =
"server=server;\ndatabase=database;\npassword=password;\nUser ID=user;";
MySqlConnection connection = new MySqlConnection(mysqlConnection);
connection.Open();
//start unwinding the duplicates from the lowest level upward
whackDuplicates(listMeasurementIDs, "measurements_new", "Measurements_ID", connection);
whackDuplicates(listTestStepIDs, "teststeps_new", "TestSteps_ID", connection);
whackDuplicates(listTestInformationIDs, "testinformation_new", "testInformation_ID", connection);
whackDuplicates(listEventDetailIDs, "eventdetails_new", "eventdetails_ID", connection);
whackDuplicates(listEventIDs, "events_new", "event_ID", connection);
connection.Close();
//update iterator from inside the clause in case there are more duplicates.
dtEventIDs = getDT(dtQuery, testDB); }
}//goofy curly brace to allow LinqPAD to deal with inline classes
public void whackDuplicates(List<string> listOfIDs,
string table,
string pkID,
MySqlConnection connection)
{
foreach(string ID in listOfIDs)
{
MySqlCommand command = connection.CreateCommand();
command.CommandText = String.Format(
"DELETE FROM " + table + " WHERE " + pkID + " = {0}", ID);
command.ExecuteNonQuery();
}
}
public DataTable getDT(string query, sbCommonFunctions.Database db)
{
return db.QueryDatabase(query);
//}/*this is deliberate, LinqPAD has a weird way of dealing with inline
classes and the last one can't have a closing curly brace (and the
first one has to have an extra opening curly brace above it, go figure)
*/
Basically this is a giant while loop, and the clause iterator is updated from inside the clause until the number of Event_ID's drops to zero (it takes 5 iterations, some of the data has as many as six duplicates).