Im building a web app using php and i have to count the words of an uploaded .doc or .docx file. So far im using the above functions in order to count the words but this code in not working for greek characters
for .doc
public static function docWordCount($file){
$fileHandle = fopen($file, "r");
$line = @fread($fileHandle, filesize($file));
$lines = explode(chr(0x0D),$line);
$outtext = "";
foreach($lines as $thisline)
{
$pos = strpos($thisline, chr(0x00));
if (($pos !== FALSE)||(strlen($thisline)==0))
{
} else {
$outtext .= $thisline." ";
}
}
$outtext = preg_replace("/[^a-zA-Z0-9\s\,\.\-\n\r\t@\/\_\(\)]/","",$outtext);
return str_word_count($outtext);
}
and for .docx:
public static function docxWordCount($file){
$striped_content = '';
$content = '';
$zip = zip_open($file);
if (!$zip || is_numeric($zip)) return false;
while ($zip_entry = zip_read($zip)) {
if (zip_entry_open($zip, $zip_entry) == FALSE) continue;
if (zip_entry_name($zip_entry) != "word/document.xml") continue;
$content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));
zip_entry_close($zip_entry);
}// end while
zip_close($zip);
$content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content);
$content = str_replace('</w:r></w:p>', "\r\n", $content);
$striped_content = strip_tags($content);
return str_word_count($striped_content);
}