I have a list of thousands of chemical formulae in a spreadsheet, and I want to count the number of times each chemical element appears in each chemical formula. Some examples are given here:
- CH3NO3
- CSe2
- C2Cl2
- C2Cl2O2
- C4H6COOH
- (C6H5)2P(CH2)6P(C6H5)2
I have found some code by @PEH (Extract numbers from chemical formula) that works really well. However, it becomes very slow when extracting thousands of values. So, I have created an array version (see below), and managed to speed things up with some input from another user (How to speed up extracting numbers from chemical formula). It works and really speeds things up. However, I also need it to find multiple elements within parentheses (the code below does not currently work for chemical formula no. 6 above - should be 30 C, 32 H, 2 P). I am hoping someone might be able to suggest a way to build on the regex approach below to achieve this. There was some code in the original ChemRegex to do this (https://stackoverflow.com/a/46091904/17194644) but I could not make it work in the sub - it gives this error if I try to include it in the sub:
Private RegEx As RegExp
Function CountElements(ChemFormulaRange As Variant, ElementRange As Variant) As Variant
'define variables
Dim RetValRange() As Long
Dim RetVal As Long
Dim ChemFormula As String
Dim npoints As Long
Dim i As Long
Dim mpoints As Long
Dim j As Long
' Connvert input ranges to variant arrays
If TypeName(ChemFormulaRange) = "Range" Then ChemFormulaRange = ChemFormulaRange.Value
If TypeName(ElementRange) = "Range" Then ElementRange = ElementRange.Value
'parameter
npoints = UBound(ChemFormulaRange, 1) - LBound(ChemFormulaRange, 1) + 1
mpoints = UBound(ElementRange, 2) - LBound(ElementRange, 2) + 1
'dimension array
ReDim RetValRange(1 To npoints, 1 To mpoints)
If RegEx Is Nothing Then
Set RegEx = New RegExp
' apply the properties
End If
'calculate all values
For j = 1 To mpoints
Element = ElementRange(1, j)
For i = 1 To npoints
RetVal = 0
ChemFormula = ChemFormulaRange(i, 1)
Call ChemRegex(ChemFormula, Element, RetVal, RegEx)
RetValRange(i, j) = RetVal
Next i
Next j
'output answer
CountElements = RetValRange
End Function
Private Sub ChemRegex(ChemFormula, Element, RetVal, RegEx)
'ChemRegex created by PEH (CC BY-SA 4.0) https://stackoverflow.com/a/46091904/17194644
With RegEx
.Global = True
.MultiLine = True
.IgnoreCase = False
End With
'first pattern matches every element once
RegEx.Pattern = "([A][cglmrstu]|[B][aehikr]?|[C][adeflmnorsu]?|[D][bsy]|[E][rsu]|[F][elmr]?|[G][ade]|[H][efgos]?|[I][nr]?|[K][r]?|[L][airuv]|[M][cdgnot]|[N][abdehiop]?|[O][gs]?|[P][abdmortu]?|[R][abefghnu]|[S][bcegimnr]?|[T][abcehilms]|[U]|[V]|[W]|[X][e]|[Y][b]?|[Z][nr])([0-9]*)"
Dim Matches As MatchCollection
Set Matches = RegEx.Execute(ChemFormula)
Dim m As Match
For Each m In Matches
If m.SubMatches(0) = Element Then
RetVal = RetVal + IIf(Not m.SubMatches(1) = vbNullString, m.SubMatches(1), 1)
End If
Next m
End Sub