1

I have a very large list of strings containing GUIDs and a 100,000+ record table in a database with an Entity Framework model in my app.

What' is the most efficient approach to finding all records in a particular dataset where the GUID List is not present?

The following is performing very slowly:

var list= new List<string> { "1", "2", "3" };

return (from t1 in db.Items
where (!list.Contains(t1.GUID))
Nick
  • 5,844
  • 11
  • 52
  • 98
  • 1
    Getting good performance on a query like that would be tough. Consider mapping to a stored procedure with a table-valued parameter (that would be your list of GUIDs). [Here is a question that discusses this at length, with good examples](http://stackoverflow.com/q/8157345/335858). – Sergey Kalinichenko Feb 06 '13 at 11:27

2 Answers2

1

When I have a lot of parameters (several hundreds or more) to a query I use bulk insert to temporary table and then join it from main table.

My code looks something like this:

    private static DataTable FillDataTable(IEnumerable<int> keys) {
        var dataTable = new DataTable("Stage");
        dataTable.Locale = CultureInfo.CurrentCulture;
        dataTable.Columns.Add("Key", typeof(int));

        foreach (var key in keys) {
            var row = dataTable.NewRow();
            row[0] = key;
            dataTable.Rows.Add(row);
        }

        return dataTable;
    }

    private static void CreateStageTable(SqlConnection connection, string tableName, DataTable dataTable) {
        var sql = new StringBuilder();
        sql.AppendLine("CREATE TABLE {StageTableName} ( ");
        sql.AppendLine("       Key INT NOT NULL ");
        sql.AppendLine(") ");

        sql.Replace("{StageTableName}", SqlUtilities.QuoteName(tableName));

        using (var command = connection.CreateCommand()) {
            command.CommandText = sql.ToString();
            command.CommandType = CommandType.Text;
            command.ExecuteNonQuery();
        }

        using (var bulkcopy = new SqlBulkCopy(connection)) {
            bulkcopy.DestinationTableName = tableName;
            bulkcopy.WriteToServer(dataTable);
        }
    }


 public void DoQuery(IEnumerable<int> keys) {
      var dataTable = FillDataTable(keys);

       using (var connection = new SqlConnection(_connectionString)) {
            connection.Open();

            CreateStageTable(connection, "#Stage", dataTable);

            string sql =   "SELECT x " +
                           "FROM tbl " +
                           "     LEFT JOIN {StageTableName} AS Stage " +
                           "          ON x.Key = Stage.Key "
                           "WHERE Stage.Key IS NULL";

            ...
        }
 }
adrianm
  • 14,468
  • 5
  • 55
  • 102
1

Don't use a List, use a HashSet<string>, this will give you O(1) lookup ups instead of O(n).

Steve
  • 8,469
  • 1
  • 26
  • 37