I am new to Python and my professor in school thinks that everyone understands the code he posts, but I am having trouble using the method searchnames to find a certain pattern in an HTML file. This is the code that he posted that he claims that does it. I have no clue what the symbols mean in the findall method call.
def searchnames(cont):
try:
info = re.findall('(\d+)\s(\w+)(\d+,\d+|\d+)\n\s(\w+)\n(\d+,\d+|\d+)', cont)
return info
except:
print "couldn't find child info"
pass
, where cont is an HTML file that contains this
<head><title>Popular Baby Names</title>
<meta name="dc.language" scheme="ISO639-2" content="eng">
<meta name="dc.creator" content="OACT">
<meta name="lead_content_manager" content="JeffK">
<meta name="coder" content="JeffK">
<meta name="dc.date.reviewed" scheme="ISO8601" content="2006-03-10">
<link rel="stylesheet" href="../OACT/templatefiles/master.css" type="text/css" media="screen">
<link rel="stylesheet" href="../OACT/templatefiles/print.css" type="text/css" media="print">
</head>
<body bgcolor="#ffffff" text="#000000" topmargin="1" leftmargin="0">
<table width="100%" border="0" cellspacing="0" cellpadding="4">
<tbody>
<tr>
<td class="sstop" valign="bottom" align="left" width="25%">
Social Security Online
</td>
<td valign="bottom" class="titletext">
<!-- sitetitle -->Popular Baby Names
</td>
</tr>
<tr bgcolor="#333366"><td colspan="2" height="1"></td></tr>
<tr>
<td class="graystars" width="25%" valign="top">
<a href="../OACT/babynames/">Popular Baby Names</a></td>
<td valign="top">
<a href="http://www.ssa.gov/"><img src="/templateimages/tinylogo.gif"
width="52" height="47" align="left"
alt="SSA logo: link to Social Security home page" border="0"></a>
<h1>Popular Names by Birth Year</h1>September 11, 2014</td>
</tr>
</tbody></table>
<script type="text/javascript" src="../OACT/babynames/chkinput.js"></script>
<table width="100%" border="0" cellspacing="0" cellpadding="4" summary="formatting">
<tr valign="top">
<td width="25%" class="greycell">
<a href="../OACT/babynames/background.html">Background information</a>
<p><br />
Select another <label for="yob">year of birth</label>?<br />
<form name="popnames" method="post" action="/cgi-bin/popularnames.cgi"
onSubmit="return submitIt();">
<input type="text" name="year" id="yob" size="4" value="2012"><input type="hidden" name="top" value="25"><input type="hidden" name="number" value="">
<input type="submit" value=" Go "></form>
</td>
<td><p align="center"><table width="$tablewidth" border="1" bordercolor="#aaabbb" cellpadding="2" cellspacing="0" summary="Popularity for top 25">
<caption><h2>Popularity in 2012</h2></caption>
<tr align="center" valign="bottom">
<th scope="col" width="12%" bgcolor="#efefef">Rank</th>
<th scope="col" width="$colwidth" bgcolor="#99ccff">Male name</th>
<th scope="col" bgcolor="pink" width="41%">Female name</th></tr>
<tr align="right">
<td>1</td> <td>Jacob</td> <td>Sophia</td>
</tr>
<tr align="right">
<td>2</td> <td>Mason</td> <td>Emma</td>
</tr>
<tr align="right">
<td>3</td> <td>Ethan</td> <td>Isabella</td>
</tr>
<tr align="right">
<td>4</td> <td>Noah</td> <td>Olivia</td>
</tr>
<tr align="right">
<td>5</td> <td>William</td> <td>Ava</td>
</tr>
<tr align="right">
<td>6</td> <td>Liam</td> <td>Emily</td>
</tr>
<tr align="right">
<td>7</td> <td>Michael</td> <td>Abigail</td>
</tr>
<tr align="right">
<td>8</td> <td>Jayden</td> <td>Mia</td>
</tr>
<tr align="right">
<td>9</td> <td>Alexander</td> <td>Madison</td>
</tr>
<tr align="right">
<td>10</td> <td>Aiden</td> <td>Elizabeth</td>
</tr>
<tr align="right">
<td>11</td> <td>Daniel</td> <td>Chloe</td>
</tr>
<tr align="right">
<td>12</td> <td>Matthew</td> <td>Ella</td>
</tr>
<tr align="right">
<td>13</td> <td>Elijah</td> <td>Avery</td>
</tr>
<tr align="right">
<td>14</td> <td>James</td> <td>Addison</td>
</tr>
<tr align="right">
<td>15</td> <td>Anthony</td> <td>Aubrey</td>
</tr>
<tr align="right">
<td>16</td> <td>Benjamin</td> <td>Lily</td>
</tr>
<tr align="right">
<td>17</td> <td>Joshua</td> <td>Natalie</td>
</tr>
<tr align="right">
<td>18</td> <td>Andrew</td> <td>Sofia</td>
</tr>
<tr align="right">
<td>19</td> <td>Joseph</td> <td>Charlotte</td>
</tr>
<tr align="right">
<td>20</td> <td>David</td> <td>Zoey</td>
</tr>
<tr align="right">
<td>21</td> <td>Jackson</td> <td>Grace</td>
</tr>
<tr align="right">
<td>22</td> <td>Logan</td> <td>Hannah</td>
</tr>
<tr align="right">
<td>23</td> <td>Christopher</td> <td>Amelia</td>
</tr>
<tr align="right">
<td>24</td> <td>Gabriel</td> <td>Harper</td>
</tr>
<tr align="right">
<td>25</td> <td>Samuel</td> <td>Lillian</td>
</tr>
<tr><td colspan="3"><small>Note: Rank 1 is the most popular,
rank 2 is the next most popular, and so forth.
</table></p>
</td></tr></table>
<table class="printhide" width="100%" border="0" cellpadding="1" cellspacing="0">
<tr bgcolor="#333366"><td height="1" colspan="2"></td></tr>
<tr>
<td width="26%" valign="middle"> </td>
<td valign="top" class="seventypercent">
<a href="http://www.ssa.gov/privacy.html">Privacy Policy</a>
| <a href="http://www.ssa.gov/websitepolicies.htm">Website Policies
& Other Important Information</a>
| <a href="http://www.ssa.gov/sitemap.htm">Site Map</a></td>
</tr>
</table>
</body></html>
I cannot understand how to find the rank or a child, which is ,rank, or the name, when ever I try and run the program info is an empty set. I don't get why. Any help will be nice This is my whole program
import re
def searchtitle(cont):
try:
title = re.search('Popularity\sin\s(\d\d\d\d)', cont)
return title.group(0)
except:
print "couldn't find title"
pass
def searchnames(cont):
try:
info = re.findall('(\d+)\s(\w+)(\d+,\d+|\d+)\n\s(\w+)\n(\d+,\d+|\d+)', cont)
return info
except:
print "couldn't find child info"
pass
if __name__ == '__main__':
try:
file = open('Popular_Baby_Names.html')
cont = file.read()
file.close()
ti = searchtitle(cont)
info = searchnames(cont)
print ti
print info
except:
print "file couldn't be found"
SystemExit
ranks = []
boysnames = []
girlsnames = []
girlsfreq = []
boysfreq = []
for info in info:
ranks.append(int(info[0]))
boysnames.append(info[1])
boysfreq.append(int(info[2].replace(',', '')))
girlsnames.append(info[3])
girlsfreq.append(int(info[4].replace(',', '')))
print ranks
print boysnames
print boysfreq
print girlsnames
print girlsfreq
print ranks[0] + ranks[1]
pass