1

I have an extremely large data sheet which I need to remove duplicates from a column, however there is different data in corresponding columns, that I need to keep

Example                              
Record  Incident  Person
  1        101       A
  2        201       D    
  3        301       X
  1        102       C
  4        401       K
  1        101       A
  2        202       F
  1        101       W
  4        401       S

I need to become:

Record  Incident  Person
  1        101       A, W
  2        201       D    
  3        301       X
  1        102       C
  4        401       K, S
  2        202       F

The person column can be separated by a column or be in an additional row, I'm not fussy.

So far I have altered the following macro to work by moving each duplicate to a new worksheet which I have then merged back to the original sheet.

Sub macro()
Dim aIds As Variant
Application.ScreenUpdating = False
ActiveSheet.Copy after:=Worksheets(Worksheets.Count)
ActiveSheet.Name = "Temp"
Set origSh = ActiveSheet
Worksheets.Add
Set myRng = Range(origSh.Range("A2"), origSh.Range("A" & Rows.Count).End(xlUp))
myRng.Copy Destination:=ActiveSheet.Range("A1")
ActiveSheet.Range("A:A").RemoveDuplicates Columns:=1, Header:=xlNo
aIds = WorksheetFunction.Index(WorksheetFunction.Transpose(Range(Range("A1"), Range("A" & 
Rows.Count).End(xlUp)).Value), 1, 0)
Application.DisplayAlerts = False
ActiveSheet.Delete
Application.DisplayAlerts = True
Res = 0
For Each Item In aIds
Res = WorksheetFunction.Max(Res, WorksheetFunction.CountIf(myRng, Item))
Next
For Idx = 1 To Res
Worksheets.Add after:=Worksheets(Worksheets.Count)
ActiveSheet.Name = "Result" & Idx
origSh.Cells(1, "A").EntireRow.Copy ActiveSheet.Range("A1")
For Each Item In aIds
    Res1 = 0
    On Error Resume Next
    Res1 = WorksheetFunction.Match(Item, myRng, 0)
    On Error GoTo 0
    If Res1 Then
        origSh.Cells(Res1 + 1, "A").EntireRow.Copy ActiveSheet.Range("A" & Rows.Count).End(xlUp).Offset(1)
        origSh.Cells(Res1 + 1, "A").EntireRow.Delete
    End If
Next

Next
Application.DisplayAlerts = False
origSh.Delete
Application.DisplayAlerts = True
Application.ScreenUpdating = True
End Sub

I'm sure it would be possible to avoid this additional step however I have undertaken a new project with no macros training and am struggling to work it out on my own.

Pᴇʜ
  • 56,719
  • 10
  • 49
  • 73
Kate
  • 11
  • 5

1 Answers1

0

The Dictionary with String Manipulation

Adjust the values in the constants section as you see fit.

Option Explicit

Sub writeUnique()

    ' Constants
    Const srcSheet As String = "Sheet1"  ' Source Worksheet Name
    Const tgtSheet As String = "Sheet2"  ' Target Worksheet Name
    Const FirstRow As Long = 2           ' Worksheet First Row (of values)
    Const srcCols As String = "A:C"      ' Source Columns Address
    Const tgtCell As String = "A2"       ' Target First Cell Address

    ' Other Variables
    Dim rng As Range        ' Last Non-Empty Cell in Source Columns,
                            ' Source Range (All Data in Source Columns)
    Dim dict As Object      ' Dictionary (Object)
    Dim Key As Variant      ' Dictionary Key (For Each Control Variable)
    Dim Source As Variant   ' Source Array
    Dim Curr As Variant     ' Current Value in Unique Column
    Dim First As Variant    ' First Column Array
    Dim Target As Variant   ' Target Array
    Dim i As Long           ' Source Array Rows Counter
    Dim k As Long           ' First Column Array or Target Array Rows Counter

    ' Read from Source Sheet and write to Source Array.
    With ThisWorkbook.Worksheets(srcSheet)
        Set rng = .Columns(srcCols) _
          .Find("*", , xlFormulas, , xlByRows, xlPrevious)
        If rng Is Nothing Then Exit Sub
        If rng.Row < FirstRow Then Exit Sub
        Set rng = .Columns(srcCols).Resize(rng.Row - FirstRow + 1) _
          .Offset(FirstRow - 1)
    End With
    Source = rng: Set rng = Nothing ' All data is in Source Array.

    ' Write from Source Array to the Dictionary and to First Column Array.
    Set dict = CreateObject("Scripting.Dictionary")
    ReDim First(1 To UBound(Source)) ' The Dictionary can only hold 2 values.
    For i = 1 To UBound(Source)
        Curr = Source(i, 2)
        If Curr <> 0 Then
            If Not dict.exists(Curr) Then
                dict(Curr) = Source(i, 3)
                k = k + 1
                First(k) = Source(i, 1)
            Else
                dict(Curr) = dict(Curr) & ", " & Source(i, 3)
            End If
        End If
    Next i
    ReDim Preserve First(1 To k)

    ' Write values from First Array and the Dictionary to Target Array.
    ReDim Target(1 To k, 1 To UBound(Source, 2))
    Erase Source ' All data is in First Array and the Dictionary.
    k = 0
    For Each Key In dict.Keys
        k = k + 1
        Target(k, 1) = First(k)
        Target(k, 2) = Key
        Target(k, 3) = dict(Key)
    Next

    ' Clear contents and write to Target Range.
    With ThisWorkbook.Worksheets(tgtSheet).Range(tgtCell)
      ' Clear contents in whole columns below Target First Cell.
      .Resize(.Parent.Rows.Count - .Row + 1, UBound(Target, 2)).ClearContents
      ' Write from Target Array to Target Range.
      .Resize(UBound(Target), UBound(Target, 2)) = Target
    End With

End Sub
VBasic2008
  • 44,888
  • 5
  • 17
  • 28