First of all, I can't stress enough; fix your indentation - it will make life easier for everyone.
Secondly, the answer depends a lot on the actual bottlenecks you are encountering:
- Regular expressions are very slow, especially when they're in a loop.
- Databases tend to either work well for WRITES or for READS but not BOTH: try decreasing the amount of queries beforehand.
- It stands to reason that the less PHP code in your loop, the faster it will work. Consider decreasing conditions (for instance).
- For the record, your code is not safe against mysql injection: filter $_POST before hand [*]
- [*] speaking of which, it's faster to access a variable than the index of an array, like $_POST.
- You can simulate multithreading by having your main program split the huge CSV file into a smaller one and run each file into a different process.
common.php
class FileLineFinder {
protected $handle, $length, $curpos;
public function __construct($file){
$handle = fopen($file, 'r');
$length = strlen(PHP_EOL);
}
public function next_line(){
while(!feof($this->handle)){
$b = fread($this->handle, $this->length);
$this->curpos += $this->length;
if ($b == PHP_EOL) return $this->curpos;
}
return false;
}
public function skip_lines($count){
for($i = 0; $i < $count; $i++)
$this->next_line();
}
public function __destruct(){
fclose($this->handle);
}
}
function exec_async($cmd, $outfile, $pidfile){
exec(sprintf("%s > %s 2>&1 & echo $! >> %s", $cmd, $outfile, $pidfile));
}
main.php
require('common.php');
$maxlines = 200; // maximum lines subtask will be processing at a time
$note = $_POST['note'];
$file = $_FILES['csv']['tmp_name'];
$outdir = dirname(__FILE__) . DIRECTORY_SEPARATOR . 'out' . DIRECTORY_SEPARATOR;
//make sure our output directory exists
if(!is_dir($outdir))
if(!mkdir($outdir, 0755, true))
die('Cannot create output directory: '.$outdir);
// run a task for each chunk of lines in the csv file
$i = 0; $pos = 0;
$l = new FileLineFinder($file);
do {
$i++;
exec_async(
'php -f sub.php -- '.$pos.' '.$maxlines.' '.escapeshellarg($file).' '.escapeshellarg($note),
$outdir.'proc'.$i.'.log',
$outdir.'proc'.$i.'.pid'
);
$l->skip_lines($maxlines);
} while($pos = $l->next_line());
// wait for each task to finish
do {
$tasks = count(glob($outdir.'proc*.pid'));
echo 'Remaining Tasks: '.$tasks.PHP_EOL;
} while ($tasks > 0);
echo 'Finished!'.PHP_EOL;
sub.php
require('common.php');
$start = (int)$argv[1];
$count = (int)$argv[2];
$file = $argv[3];
$note = mysql_real_escape_string($argv[4]);
$lines = 0;
$handle = fopen($file, 'r');
fseek($handle, $start, SEEK_SET);
$expression = "/^[_a-z0-9-]+(\.[_a-z0-9-]+)*@[a-z0-9-]+(\.[a-z0-9-]+)*(\.[a-z]{2,3})$/";
mysql_query('SET NAMES utf8');
//loop through the csv file and insert into database
do {
$lines++;
if ($data[0]) {
if (preg_match($expression, $data[0])) {
$query = mysql_query('SELECT * FROM `postfix`.`recipient_access` where recipient="'.$data[0].'"');
$fetch = mysql_fetch_array($query);
if($fetch['recipient'] != $data[0]){
$query = mysql_query('INSERT INTO `postfix`.`recipient_access`(`recipient`, `note`) VALUES("'.$data[0].'","'.$note.'")');
}
}
}
} while (($data = fgetcsv($handle, 1000, ',', '\'')) && ($lines < $count));
Credits