0

So far I have made a website in which search features is there. There are different extensions of files(docx,doc,pdf etc..) which user can uploaded in the database. So I need to search the file by its content. By I'm not able to search it properly. I have made two features 1. search by name of file 2. search by phrase

Search by name is working perfectly but there is problem with the search by phrase part.I'm able to convert these files into a text file. But i don't know why I'm not able to search in that file.So, can anyone tell me where I'm wrong or provide me another solution for this.

Here is the code....

homepage.php

<form method="post" action="search1.php" class="container 50%" id="searchform">
                            <input type="text" name="name" placeholder="Enter the terms you wish to search for" />
                                <input type="submit" name="submit" value="Search" class="fit special" />
<input type="radio" id="name" name="search" value="name" class="fit special" />
                                <input type="radio" id="phrase" name="search" value="phrase" class="fit special" />                         
                        </form>

search1.php

    <?php require_once("/includes/functions.php"); ?>
<?php require_once("/includes/class.php"); ?>

<?php
$dbhost = "localhost";
$dbuser = "root";
$dbpass = "sandeep";
$dbname = "dbtuts";
mysql_connect($dbhost,$dbuser,$dbpass) or die('cannot connect to the server'); 
mysql_select_db($dbname) or die('database selection problem');
?>

<!DOCTYPE html>
<html>
<head>
<title>SEARCHED FILES</title>
<link rel="stylesheet" href="assets/css/main.css" />
</head>
<body>
<section>   
<div class="table-wrapper">
      <table class="alt">
        <thead>
            <tr>
                <th>File Name</th>
                <th>View</th>
            </tr>
        </thead>    
<?php 
     if(isset($_POST['submit'])){ 
      $name=$_POST['name']; 
      if($name!=NULL)
      {
      if (!empty($_POST['search'])) {
        if ($_POST['search']=="phrase") { //search by phrase
      $searchthis = $name;
      $matches = array();

        $query  = "SELECT file from ada ";
        $query .= "UNION ";
        $query .= "SELECT file from cdr ";
        $query .= "UNION ";
        $query .= "SELECT file from others ";
        $query .= "UNION ";
        $query .= "SELECT file from pdr ";
        $query .= "UNION ";
        $query .= "SELECT file from rr ";
        $query .= "UNION ";
        $query .= "SELECT file from sdd ";
        $query .= "UNION ";
        $query .= "SELECT file from tbl_uploads ";

        $result = mysql_query($query);
        $new_file = fopen("sample.txt","w") or die("Unable to open file!!");

        while($row=mysql_fetch_array($result))
         {
            $filepath = getcwd() . "\uploads\\".$row['file'];
            $path = str_replace('//', '\\', $filepath);
            $Obj = new DocxConversion($path);
            $Text= $Obj->convertToText();
            fwrite($new_file,$Text);
            echo $new_file."<br/>";
            $handle = fopen($new_file, "r");
             if ($handle)
             {
                while (!feof($handle))
                {
                     $buffer = fgets($handle);
                     if(strpos($buffer, $searchthis) !== FALSE)
                     {
                         $matches[] = $row['file'];
                         break;
                     }

                }
                    fclose($handle);
              }
         }
       $matches = array_filter($matches);

        if (!empty($matches)) 
        {
               foreach($matches as $row)
                {
                ?>
                <tr>
                <td><?php echo $row ?></td>
                <td><a href="uploads/<?php echo $row ?>" target="_blank">view file</a></td>
                </tr>
                <?php
                }
        }
        else
        {
            //echo " Phrase not found!!!";
            ?>
            <script>
                alert('Phrase not Found');
                window.location.href='homepage.php';
            </script>
            <?php
        }
      }
     else{                              //search by name
          $array = array(
        "db1" => "ada",
        "db2" => "cdr",
        "db3" => "others",
        "db4" => "pdr",
        "db5" => "rr",
        "db6" => "sdd",
        "db7" => "tbl_uploads",
        );

      //connect  to the database 
      $db=mysql_connect("localhost","root","sandeep") or die ('I cannot connect to the database  because:'.mysql_error()); 

      //-select  the database to use 
      $mydb=mysql_select_db("dbtuts"); 
      $no_of_access = false;
      while ($db_name = current($array)) 
      {  

      //-query  the database table 
      $sql = "SELECT * FROM $db_name WHERE (file LIKE '%$name%')";

      //-run  the query against the mysql query function 
      $result=mysql_query($sql); 
      $num_rows = mysql_num_rows($result);
      if($num_rows > 0)
      {
      //-create  while loop and loop through result set 
      $no_of_access = true;
      while($row=mysql_fetch_array($result))
        {
        ?>
        <tr>
        <td><?php echo $row['file'] ?></td>
        <td><a href="uploads/<?php echo $row['file'] ?>" target="_blank">view file</a></td>
        </tr>
        <?php
        }
      }
      else 
        {
            if(!$no_of_access && $db_name == "tbl_uploads")
            //echo "<p> Result not found!!<p>";
            {
            ?>
            <script>
                alert('Result Not Found!!');
                window.location.href='homepage.php';
            </script>
            <?php
            }
        }
        next($array);
      }

     }    
     }
     else
          { 
            //echo  "<p>Please select an option</p>"; 
            ?>
            <script>
                alert('Please Select an option');
                window.location.href='homepage.php';
            </script>
            <?php
          } 
    } 
      else
          { 
            //echo  "<p>Please enter a search query</p>"; 
            ?>
            <script>
                alert('Please enter a search query');
                window.location.href='homepage.php';
            </script>
            <?php
          } 
    }
?> 
</table>
</div>
</section>  
</body> 
</html>

The above code searches by name of the file perfectly but there is some problem with the by phrase part.

class.php

  <?php require_once("/includes/pdf.php"); ?>
<?php
class DocxConversion{
    private $filename;

    public function __construct($filePath) {
        $this->filename = $filePath;
    }

/************************doc file************************************/
    private function read_doc() {
        $fileHandle = fopen($this->filename, "r");
        $line = @fread($fileHandle, filesize($this->filename));   
        $lines = explode(chr(0x0D),$line);
        $outtext = "";
        foreach($lines as $thisline)
          {
            $pos = strpos($thisline, chr(0x00));
            if (($pos !== FALSE)||(strlen($thisline)==0))
              {
              } else {
                $outtext .= $thisline." ";
              }
          }
         $outtext = preg_replace("/[^a-zA-Z0-9\s\,\.\-\n\r\t@\/\_\(\)]/","",$outtext);
        return $outtext;
    }

/************************docx file************************************/ 
    private function read_docx(){

        $striped_content = '';
        $content = '';

        $zip = zip_open($this->filename);

        if (!$zip || is_numeric($zip)) return false;

        while ($zip_entry = zip_read($zip)) {

            if (zip_entry_open($zip, $zip_entry) == FALSE) continue;

            if (zip_entry_name($zip_entry) != "word/document.xml") continue;

            $content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));

            zip_entry_close($zip_entry);
        }// end while

        zip_close($zip);

        $content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content);
        $content = str_replace('</w:r></w:p>', "\r\n", $content);
        $striped_content = strip_tags($content);

        return $striped_content;
    }

/************************PDF file************************************/  
    private function read_pdf(){
             $a=new PDF2Text();
             $a->setFilename($this->filename);
             $a->decodePDF();
             echo $a->output();

    }

 /************************excel sheet************************************/

function xlsx_to_text($input_file){
    $xml_filename = "xl/sharedStrings.xml"; //content file name
    $zip_handle = new ZipArchive;
    $output_text = "";
    if(true === $zip_handle->open($input_file)){
        if(($xml_index = $zip_handle->locateName($xml_filename)) !== false){
            $xml_datas = $zip_handle->getFromIndex($xml_index);
            $xml_handle = new DOMDocument();
            $xml_handle->loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
            $output_text = strip_tags($xml_handle->saveXML());
        }else{
            $output_text .="";
        }
        $zip_handle->close();
    }else{
    $output_text .="";
    }
    return $output_text;
}

/*************************power point files*****************************/
function pptx_to_text($input_file){
    $zip_handle = new ZipArchive;
    $output_text = "";
    if(true === $zip_handle->open($input_file)){
        $slide_number = 1; //loop through slide files
        while(($xml_index = $zip_handle->locateName("ppt/slides/slide".$slide_number.".xml")) !== false){
            $xml_datas = $zip_handle->getFromIndex($xml_index);
            $xml_handle = new DOMDocument();
            $xml_handle->loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
            $output_text .= strip_tags($xml_handle->saveXML());
            $slide_number++;
        }
        if($slide_number == 1){
            $output_text .="";
        }
        $zip_handle->close();
    }else{
    $output_text .="";
    }
    return $output_text;
}


    public function convertToText() {

        if(isset($this->filename) && !file_exists($this->filename)) {
            return "File Not exists";
        }

        $fileArray = pathinfo($this->filename);
        $file_ext  = $fileArray['extension'];
        if($file_ext == "doc" || $file_ext == "docx" || $file_ext == "xlsx" || $file_ext == "pptx" || $file_ext == "pdf")
        {
            if($file_ext == "doc") {
                return $this->read_doc($this->filename);
            } elseif($file_ext == "docx") {
                return $this->read_docx($this->filename);
            } elseif($file_ext == "xlsx") {
                return $this->xlsx_to_text($this->filename);
            }elseif($file_ext == "pptx") {
                return $this->pptx_to_text($this->filename);
            }elseif($file_ext == "pdf") {
                return $this->read_pdf($this->filename);
            }
        } else {
            return "Invalid File Type";
        }
    }

}

?>

the above code class.php converts the doc,docx,xlsx,pdf to text.

pdf.php http://pastebin.com/dvwySU1a this class converts a pdf file to a text file.

2 Answers2

0

This part is wrong (I think):

        fwrite($new_file,$Text);
        echo $new_file."<br/>";
        $handle = fopen($new_file, "r");

in $new_file you have "file pointer or FALSE" from previous fopen.. also you do not close the txt file (you should call fclose after fwrite if you are going to open it then).

Why dont you just search the string for the phrase.. why do you need to write it to another txt file? You can just search the text like here

Community
  • 1
  • 1
nayana
  • 3,787
  • 3
  • 20
  • 51
  • I have closed the file if you look in search1.php "fclose($handle)" and why i want to write in text file because I don't want to print the text from the file. So I'm writing all the converted text to txt file and then searching in it. I can't search the phrase directly bcz some files are encoded in different format like pdf, docx and xlsx file because these are zipped files. And if you look at the while condition in the search1.php file it also doing the same thing. – AaronStone May 31 '16 at 14:11
  • @AaronStone ok but you cannot fopen($new_file .. fopen expects file name as first argument not file descriptor(if I read the docs correctly).. still I do not understand.. I was not telling anything about printing .. just search the string directly ;) I mean the already converted $Text – nayana May 31 '16 at 14:27
0

Finally i got the solution by myself

search1.php

<?php require_once("/includes/functions.php"); ?>
<?php require_once("/includes/class.php"); ?>

<?php
$dbhost = "localhost";
$dbuser = "root";
$dbpass = "sandeep";
$dbname = "dbtuts";
mysql_connect($dbhost,$dbuser,$dbpass) or die('cannot connect to the server'); 
mysql_select_db($dbname) or die('database selection problem');
?>

<!DOCTYPE html>
<html>
<head>
<title>SEARCHED FILES</title>
<link rel="stylesheet" href="assets/css/main.css" />
</head>
<body>
<section>   
<div class="table-wrapper">
      <table class="alt">
        <thead>
            <tr>
                <th>File Name</th>
                <th>View</th>
            </tr>
        </thead>    
<?php 
     if(isset($_POST['submit'])){ 
      $name=$_POST['name']; 
      if($name!=NULL)
      {
      if (!empty($_POST['search'])) {
        if ($_POST['search']=="phrase") { //search by phrase
            $searchthis = strtolower($name);
            $matches = array();
            $array = array(
        "db1" => "ada",
        "db2" => "cdr",
        "db3" => "others",
        "db4" => "pdr",
        "db5" => "rr",
        "db6" => "sdd",
        "db7" => "tbl_uploads",
        );


        while ($db_name = current($array)) 
      {  

        $query= "SELECT file FROM $db_name";
        $result = mysql_query($query);

        while($row=mysql_fetch_array($result))
         {
            $filepath = getcwd() . "\uploads\\".$row['file'];
            $path = str_replace('//', '\\', $filepath);
            $Obj = new DocxConversion($path);
            $Text= $Obj->convertToText();
            $new_file = fopen("sample.txt","w") or die("Unable to open file!!");
            fwrite($new_file,strtolower($Text));

            $handle = fopen("sample.txt", "r");

            if ($handle)
             {
                while (!feof($handle))
                {
                     $buffer = fgets($handle);
                     if(strpos($buffer, $searchthis) !== FALSE)
                     {
                         $matches[] = $row['file'];
                         break;
                     }

                }
                    fclose($handle);
              }fclose($new_file);
         }next($array);
      } 
       $matches = array_filter($matches);

        if (!empty($matches)) 
        {
               foreach($matches as $row)
                {
                ?>
                <tr>
                <td><?php echo $row ?></td>
                <td><a href="uploads/<?php echo $row ?>" target="_blank">view file</a></td>
                </tr>
                <?php
                }
        }
        else
        {
            //echo " Phrase not found!!!";
            ?>
            <script>
                alert('Phrase not Found');
                window.location.href='homepage.php';
            </script>
            <?php
        }

      }
     else{                              //search by name
          $array = array(
        "db1" => "ada",
        "db2" => "cdr",
        "db3" => "others",
        "db4" => "pdr",
        "db5" => "rr",
        "db6" => "sdd",
        "db7" => "tbl_uploads",
        );

      //connect  to the database 
      $db=mysql_connect("localhost","root","sandeep") or die ('I cannot connect to the database  because:'.mysql_error()); 

      //-select  the database to use 
      $mydb=mysql_select_db("dbtuts"); 
      $no_of_access = false;
      while ($db_name = current($array)) 
      {  

      //-query  the database table 
      $sql = "SELECT * FROM $db_name WHERE (file LIKE '%$name%')";

      //-run  the query against the mysql query function 
      $result=mysql_query($sql); 
      $num_rows = mysql_num_rows($result);
      if($num_rows > 0)
      {
      //-create  while loop and loop through result set 
      $no_of_access = true;
      while($row=mysql_fetch_array($result))
        {
        ?>
        <tr>
        <td><?php echo $row['file'] ?></td>
        <td><a href="uploads/<?php echo $row['file'] ?>" target="_blank">view file</a></td>
        </tr>
        <?php
        }
      }
      else 
        {
            if(!$no_of_access && $db_name == "tbl_uploads")
            //echo "<p> Result not found!!<p>";
            {
            ?>
            <script>
                alert('Result Not Found!!');
                window.location.href='homepage.php';
            </script>
            <?php
            }
        }
        next($array);
      }

     }    
     }
     else
          { 
            //echo  "<p>Please select an option</p>"; 
            ?>
            <script>
                alert('Please Select an option');
                window.location.href='homepage.php';
            </script>
            <?php
          } 
    } 
      else
          { 
            //echo  "<p>Please enter a search query</p>"; 
            ?>
            <script>
                alert('Please enter a search query');
                window.location.href='homepage.php';
            </script>
            <?php
          } 
    }
?> 
</table>
</div>
</section>  
</body> 
</html>