-3

I am looking the for the correct syntax to use a text function to find strings that contain underscores. I have a very large document that has numerous tables names with underscores. They are in the format abc_def or abc_def_ghi or abc_def_ghi_jkl etc.

I have tried various combinations and cannot get anything to work as I expect it to. Below is my latest iteration.

Sub ExtractTablesToNewDocument()
'=========================
'Macro created 2008 by Lene Fredborg, DocTools - www.thedoctools.com
'THIS MACRO IS COPYRIGHT. YOU ARE WELCOME TO USE THE MACRO BUT YOU MUST KEEP THE LINE ABOVE.
'YOU ARE NOT ALLOWED TO PUBLISH THE MACRO AS YOUR OWN, IN WHOLE OR IN PART.
'=========================
'The macro creates a new document,
'finds all words consisting of 3 or more uppercase letters
'in the active document and inserts the words
'in column 1 of a 3-column table in the new document
'Each acronym is added only once
'Use column 2 for definitions
'Page number of first occurrence is added by the macro in column 3

'Minor adjustments are made to the styles used
'You may need to change the style settings and table layout to fit your needs
'=========================

Dim oDoc_Source As Document
Dim oDoc_Target As Document
Dim strListSep As String
Dim strAcronym As String
Dim oTable As Table
Dim oRange As Range
Dim n As Long
Dim strAllFound As String
Dim Title As String
Dim Msg As String

Title = "Extract Acronyms to New Document"

'Show msg - stop if user does not click Yes
Msg = "This macro finds all words consisting of 3 or more " & _
    "uppercase letters and extracts the words to a table " & _
    "in a new document where you can add definitions." & vbCr & vbCr & _
    "Do you want to continue?"

If MsgBox(Msg, vbYesNo + vbQuestion, Title) <> vbYes Then
    Exit Sub
End If

Application.ScreenUpdating = False

'Find the list separator from international settings
'May be a comma or semicolon depending on the country
strListSep = Application.International(wdListSeparator)

'Start a string to be used for storing names of acronyms found
strAllFound = "#"

Set oDoc_Source = ActiveDocument

'Create new document for acronyms
Set oDoc_Target = Documents.Add

With oDoc_Target
    'Make sure document is empty
    .Range = ""

    'Insert info in header - change date format as you wish
    .PageSetup.TopMargin = CentimetersToPoints(3)
    .Sections(1).Headers(wdHeaderFooterPrimary).Range.Text = _
        "Acronyms extracted from: " & oDoc_Source.FullName & vbCr & _
        "Created by: " & Application.UserName & vbCr & _
        "Creation date: " & Format(Date, "MMMM d, yyyy")

    'Adjust the Normal style and Header style
    With .Styles(wdStyleNormal)
        .Font.Name = "Arial"
        .Font.Size = 10
        .ParagraphFormat.LeftIndent = 0
        .ParagraphFormat.SpaceAfter = 6
    End With

    With .Styles(wdStyleHeader)
        .Font.Size = 8
        .ParagraphFormat.SpaceAfter = 0
    End With

    'Insert a table with room for acronym and definition
    Set oTable = .Tables.Add(Range:=.Range, NumRows:=2, NumColumns:=3)
    With oTable
        'Format the table a bit
        'Insert headings
        .Range.Style = wdStyleNormal
        .AllowAutoFit = False

        .Cell(1, 1).Range.Text = "Acronym"
        .Cell(1, 2).Range.Text = "Definition"
        .Cell(1, 3).Range.Text = "Page"
        'Set row as heading row
        .Rows(1).HeadingFormat = True
        .Rows(1).Range.Font.Bold = True
        .PreferredWidthType = wdPreferredWidthPercent
        .Columns(1).PreferredWidth = 20
        .Columns(2).PreferredWidth = 70
        .Columns(3).PreferredWidth = 10
    End With
End With

With oDoc_Source
    Set oRange = .Range

    n = 1 'used to count below

    With oRange.Find
        'Use wildcard search to find strings consisting of 3 or more uppercase letters
        'Set the search conditions
        'NOTE: If you want to find acronyms with e.g. 2 or more letters,
        'change 3 to 2 in the line below
        .Text = "<*>[_]<*>"
        .Forward = True
        .Wrap = wdFindStop
        .Format = False
        .MatchCase = False
        .MatchWildcards = True

        'Perform the search
        Do While .Execute
            'Continue while found
            strAcronym = oRange
            'Insert in target doc

            'If strAcronym is already in strAllFound, do not add again
            If InStr(1, strAllFound, "#" & strAcronym & "#") = 0 Then
                'Add new row in table from second acronym
                If n > 1 Then oTable.Rows.Add
                'Was not found before
                strAllFound = strAllFound & strAcronym & "#"

                'Insert in column 1 in oTable
                'Compensate for heading row
                With oTable
                    .Cell(n + 1, 1).Range.Text = strAcronym
                    'Insert page number in column 3
                    .Cell(n + 1, 3).Range.Text = oRange.Information(wdActiveEndPageNumber)
                End With

                n = n + 1
            End If
        Loop
    End With
End With

'Sort the acronyms alphabetically - skip if only 1 found
If n > 2 Then
    With Selection
        .Sort ExcludeHeader:=True, FieldNumber:="Column 1", SortFieldType _
            :=wdSortFieldAlphanumeric, SortOrder:=wdSortOrderAscending

        'Go to start of document
        .HomeKey (wdStory)
    End With
End If

Application.ScreenUpdating = True

'If no acronyms found, show msg and close new document without saving
'Else keep open
If n = 1 Then
    Msg = "No acronyms found."
    oDoc_Target.Close savechanges:=wdDoNotSaveChanges
Else
    Msg = "Finished extracting " & n - 1 & " acronymn(s) to a new document."
End If

MsgBox Msg, vbOKOnly, Title

'Clean up
Set oRange = Nothing
Set oDoc_Source = Nothing
Set oDoc_Target = Nothing
Set oTable = Nothing

End Sub
dwirony
  • 5,487
  • 3
  • 21
  • 43
samtau
  • 3
  • 3
  • could you please show a bit more of your code? I'm not sure what you are trying. Are you using regex, or just iterate through the words and compare, or...? – vacip Dec 31 '18 at 15:19
  • The problem is that [Word treats underscores as word boundaries](https://stackoverflow.com/q/21146470/11683), so the `<` and `>` won't help. If you need to capture entire words with underscores as opposed to the underscores themselves, [use a proper regex](https://stackoverflow.com/q/25102372/11683). – GSerg Dec 31 '18 at 15:42
  • As a side note - nice copyright note at the top - does that really have any validity??? – dwirony Dec 31 '18 at 17:55
  • Revise your search to find the first '_' then extend the found range forward and backwards to find the bracketing space or whitespace characters. The range can be extended by making a copy of the found range using .duplicate and then movestartuntil and moveenduntil. You will need to select a suitable set of characters to account for things like commas, full stops etc. – freeflow Dec 31 '18 at 18:45
  • Although a wildcard search for ([_A-Z]{1,}) might do the job better. – freeflow Dec 31 '18 at 18:49
  • @Freeflow below is my output from the wild card search for If lY_mtv_iv_inp changes, the test is being deleted, and (either the test is not for a GRADE, or the lm_me_are is missing), and the test is not ORIG, then raise an error. Acronym Definition Page _ 1 GRADE 1 I 1 ORIG 1 Y_ 1 I am looking for an output of Acronym Definition Page lY_mtv_iv_inp 1 lm_me_are 1 – samtau Dec 31 '18 at 20:07
  • Sorry i cannot get my table to format in my comment – samtau Dec 31 '18 at 20:21
  • @dwirony Yes the copyright notice is valid. Indeed, any code you or I write is subject to copyright protection even without such a notice. – macropod Dec 31 '18 at 21:11

3 Answers3

1

Try:

.Text = "[! ]@_[! ]{1,}"

This will find strings containing however many underscores there might be, including where those strings start or end with an underscore.

macropod
  • 12,757
  • 2
  • 9
  • 21
  • I updated the code to your suggestion and created a document with the simple test commented below and word gets hung up and stops responding any suggestions? – samtau Jan 02 '19 at 14:59
  • If lY_mtv_iv_inp changes, the test is being deleted, and (either the test is not for a GRADE, or the lm_me_are is missing), and the test is not ORIG, then raise an error. – samtau Jan 02 '19 at 14:59
  • @samtau Perhaps you could explain what you're trying to do - the macro was originally written for extracting acronyms, but the name you've given it and the desired Find expression suggest you're trying to do something rather different. – macropod Jan 02 '19 at 21:21
  • I am trying to extract table acronyms from a large 50+ page technical document. all Table acronyms have underscores in their names. Ignore the initial macro message as i didn't modify that yet. Two comments above is an example excerpt from my document. "lY_mtv_iv_inp" is a table Acronym using underscores. So in the example above i would like to extract "lY_mtv_iv_inp" and "lm_me_are" as the table acronyms. Hope this helps – samtau Jan 03 '19 at 13:27
0

Try:

Sub AcronymLister()
Application.ScreenUpdating = False
Dim StrTmp As String, StrAcronyms As String, i As Long, j As Long, k As Long, Rng As Range, Tbl As Table
StrAcronyms = "Acronym" & vbTab & "Page" & vbCr
With ActiveDocument
  With .Range
    With .Find
      .ClearFormatting
      .Replacement.ClearFormatting
      .MatchWildcards = True
      .Wrap = wdFindStop
      .Text = "[! ^13^t^11]@_[! ^13^t^11]{1,}"
      .Replacement.Text = ""
      .Execute
    End With
    Do While .Find.Found = True
      If InStr(1, StrAcronyms, .Text, vbBinaryCompare) = 0 Then
          StrAcronyms = StrAcronyms & .Text & vbTab & .Information(wdActiveEndAdjustedPageNumber) & vbCr
      End If
      If Len(.Text) = 0 Then .End = .Paragraphs(1).Range.Next.Start
      .Collapse wdCollapseEnd
      .Find.Execute
    Loop
  End With
  With .Range
    Set Rng = .Characters.Last
    With Rng
      If .Characters.First.Previous <> vbCr Then .InsertAfter vbCr
      .InsertAfter Chr(12)
      .Collapse wdCollapseEnd
      .Style = "Normal"
      .Text = StrAcronyms
      Set Tbl = .ConvertToTable(Separator:=vbTab, NumRows:=.Paragraphs.Count, NumColumns:=2)
      With Tbl
        .Columns.AutoFit
        .Rows(1).HeadingFormat = True
        .Rows(1).Range.Style = "Strong"
        .Rows.Alignment = wdAlignRowCenter
      End With
      .Collapse wdCollapseStart
    End With
  End With
End With
Set Rng = Nothing: Set Tbl = Nothing
Application.ScreenUpdating = True
End Sub

Note: The above code puts the acronym table at the end of the current document. I've modified the Find expression to exclude tabs, paragraph breaks & line breaks.

macropod
  • 12,757
  • 2
  • 9
  • 21
  • Thank you for all your time and help it is greatly appreciated. The code works great for a text only file but gets stuck in the loop code below when the file has an object or a footer. Any thoughts? Again Thank You for your time and support. I created a test file but have no way to provide access to it. Do While .Find.Found = True If (InStr(1, StrAcronyms, .Text, vbBinaryCompare) = 0) Then StrAcronyms = StrAcronyms & .Text & vbTab & .Information(wdActiveEndAdjustedPageNumber) & vbCr End If .Collapse wdCollapseEnd .Find.Execute Loop – samtau Jan 04 '19 at 15:51
  • Sorry i have spent almost an hour now trying to get the code to format correctly in the comment above. I stink...... – samtau Jan 04 '19 at 16:54
  • The code I posted works through the document body only; it doesn't look in headers, footers or textboxes. But then, neither does your code in post #1. So I don't understand your 'The code works great for a text only file but gets stuck in the loop code below when the file has an object or a footer' comment. FWIW, you can't post formatted code in comments. – macropod Jan 04 '19 at 20:26
  • Sorry i was not clear. I dont want to search objects or footers. Everything works great with a document that only has text. If i add an object into that same document the code then gets stuck in a loop. Is there anyway to upload/share the test file i created? – samtau Jan 04 '19 at 21:29
  • You could upload the file to, say, OneDrive, and post a link here. – macropod Jan 04 '19 at 22:13
  • I have uploaded two files. "Macro Test V1 Objects" This file the macro gets stuck in the code loop in the above comment. "Macro Test V1 No Objects" This file the does not have any objects and the code works great. [No Object]https://drive.google.com/open?id=19kv_ToNfYSTqOeatxapDs2q-va2cipxw [Object]https://drive.google.com/open?id=1AAPC2SFGIEM0DCX4qmH5hJIQlXJvysYZ – samtau Jan 07 '19 at 14:47
  • @samtau Code revised. Try it now. – macropod Jan 07 '19 at 21:47
  • I cannot seem to find where the code changed. Did the changes save? – samtau Jan 08 '19 at 13:50
  • I added an If test - which seems to have disappeared. I've re-added it - and it's still there. – macropod Jan 08 '19 at 20:53
  • I tested string length code you added but it is still getting stuck in a loop with the test file at. Do While .Find.Found = True – samtau Jan 08 '19 at 21:23
  • @samtau I forgot to add the Else part of the added If test. Fixed. – macropod Jan 09 '19 at 00:34
  • I retested the code and it still appears to be stuck in the loop. – samtau Jan 09 '19 at 01:00
  • Not when I run it with your sample documents – macropod Jan 09 '19 at 02:44
  • were you able to get it to work in the "Macro Test V1 Objects" file? For me it gets stuck in the "If InStr(.Text, "_") > 0 Then" loop. I added some code in an answer above to address zero length strings and then it worked. Do you think it could be some local setting on my end that is causing it to get stuck in the loop? – samtau Jan 09 '19 at 03:23
  • Try it with the latest revision. – macropod Jan 09 '19 at 06:20
0
Sub AcronymLister()
Application.ScreenUpdating = False
Dim StrTmp As String, StrAcronyms As String, i As Long, j As Long, k As Long, Rng As Range, Tbl As Table
StrAcronyms = "Acronym" & vbTab & "Page" & vbCr
With ActiveDocument
  With .Range
    With .Find
      .ClearFormatting
      .Replacement.ClearFormatting
      .MatchWildcards = True
      .Wrap = wdFindStop
      .Text = "[! ^13^t^11]@_[! ^13^t^11]{1,}"
      .Replacement.Text = ""
      .Execute
    End With
    Do While .Find.Found = True
      If InStr(.Text, "_") = 0 Then
        If InStr(1, StrAcronyms, .Text, vbBinaryCompare) = 0 Then
          StrAcronyms = StrAcronyms & .Text & vbTab & .Information(wdActiveEndAdjustedPageNumber) & vbCr
        Else
          .End = .Paragraphs(1).Range.Next.Start
        End If
      End If
     If InStr(.Text, "_") > 0 Then
        If InStr(1, StrAcronyms, .Text, vbBinaryCompare) = 0 Then
          StrAcronyms = StrAcronyms & .Text & vbTab & .Information(wdActiveEndAdjustedPageNumber) & vbCr
        Else
          .End = .Paragraphs(1).Range.Next.Start
        End If
      End If
      .Collapse wdCollapseEnd
      .Find.Execute
    Loop
  End With
  With .Range
    Set Rng = .Characters.Last
    With Rng
      If .Characters.First.Previous <> vbCr Then .InsertAfter vbCr
      .InsertAfter Chr(12)
      .Collapse wdCollapseEnd
      .Style = "Normal"
      .Text = StrAcronyms
      Set Tbl = .ConvertToTable(Separator:=vbTab, NumRows:=.Paragraphs.Count, NumColumns:=2)
      With Tbl
        .Columns.AutoFit
        .Rows(1).HeadingFormat = True
        .Rows(1).Range.Style = "Strong"
        .Rows.Alignment = wdAlignRowCenter
      End With
      .Collapse wdCollapseStart
    End With
  End With
End With
Set Rng = Nothing: Set Tbl = Nothing
Application.ScreenUpdating = True
End Sub
@macropod i added a loop to handle zero length strings and the code appears to be working. I know this is not the most efficient method, do you have any suggestions for improvement? Thank you again for walking me through this i really appreciate all your help and knowledge.
samtau
  • 3
  • 3