Was looking for just such a solution so thanks marcog. Here's an attempt to port your friend's python version (I don't know python or perl so there's probably some mistakes):
function indefinite_article($word) {
// Lowercase version of the word
$word_lower = strtolower($word);
// An 'an' word (specific start of words that should be preceeded by 'an')
$an_words = array('euler', 'heir', 'honest', 'hono');
foreach($an_words as $an_word) {
if(substr($word_lower,0,strlen($an_word)) == $an_word) return "an";
}
if(substr($word_lower,0,4) == "hour" and substr($word_lower,0,5) != "houri") return "an";
// An 'an' letter (single letter word which should be preceeded by 'an')
$an_letters = array('a','e','f','h','i','l','m','n','o','r','s','x');
if(strlen($word) == 1) {
if(in_array($word_lower,$an_letters)) return "an";
else return "a";
}
// Capital words which should likely by preceeded by 'an'
if(preg_match('/(?!FJO|[HLMNS]Y.|RY[EO]|SQU|(F[LR]?|[HL]|MN?|N|RH?|S[CHKLMNPTVW]?|X(YL)?)[AEIOU])[FHLMNRSX][A-Z]/', $word)) return "an";
// Special cases where a word that begins with a vowel should be preceeded by 'a'
$regex_array = array('^e[uw]','^onc?e\b','^uni([^nmd]|mo)','^u[bcfhjkqrst][aeiou]');
foreach($regex_array as $regex) {
if(preg_match('/'.$regex.'/',$word_lower)) return "a";
}
// Special capital words
if(preg_match('/^U[NK][AIEO]/',$word)) return "a";
// Not sure what this does
else if($word == strtoupper($word)) {
$array = array('a','e','d','h','i','l','m','n','o','r','s','x');
if(in_array($word_lower[0],$array)) return "an";
else return "a";
}
// Basic method of words that begin with a vowel being preceeded by 'an'
$vowels = array('a','e','i','o','u');
if(in_array($word_lower[0],$vowels)) return "an";
// Instances where y follwed by specific letters is preceeded by 'an'
if(preg_match('/^y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)/', $word_lower)) return "an";
// Default to 'a'
return "a";
}
There's one bit (below the comment "// Not sure what this does") that I was unsure of what it did. If anyone can figure it out, I'd be happy to know.