0

I am trying to read 2 files and compare each item in each file to see if they are equal.

use std::fs::File;
use std::io::{BufRead, BufReader};

fn main() {
    let filename1 = "file1.txt";
    let filename2 = "file2.txt";

    // Open the file in read-only mode (ignoring errors).
    let file = File::open(filename1).unwrap();
    let reader = BufReader::new(file);

    let file2 = File::open(filename2).unwrap();
    let mut reader2 = BufReader::new(file2);

    // Read the file line by line using the lines() iterator from std::io::BufRead.
    for line1 in reader.lines() {

        let line = line.unwrap(); // Ignore errors.

        for line2 in reader2.lines() {
            let line2 = line2.unwrap(); // Ignore errors.
            
            if line2 == line1 {
                println!("{}",line2)
            }

        }
    }
}

However, this doesn't work. How do I apply a loop over a loop with Buffers?

Henri
  • 983
  • 3
  • 11
  • 29
  • 2
    It is not clear to me what you want this to do. Do you want to compare line-by line. Or loop find all matching lines? What does "this doesn't work" mean? Is it that the code doesn't compile, or doesn't do what you expect? I _think_ what you need is just to read the second file into a hash-set and loop over the verctor instead of looping over the `reader2.lines()` - as that doesn't reset the files interator each loop. – Michael Anderson Apr 19 '21 at 03:07
  • Does `for (line1, line2) in reader.lines().zip (reader2.lines())` do what you want? – Jmb Apr 19 '21 at 06:51
  • @Jmb I think that's what is wanted here.. Perhaps consider writing an answer instead since your oneliner might not reveal enough of the solution? – Jonas Berlin Apr 19 '21 at 08:02
  • Let me explain. I have 2 files file1.txt ("cars","toys"....) And file2.txt ("watch", "toys",...) I want to compare the 2 files and get all words in common in the 2 files ("toys" in my example). – Henri Apr 19 '21 at 10:05
  • @Henri existing solutions may inspire you if you want to do a diff. For example https://johannh.me/difference.rs/ – Ratah Apr 19 '21 at 13:12

2 Answers2

1

Your first problem is a duplicate of this question. TLDR: you need to call by_ref if you want to be able to reuse reader2 after calling its lines method (eg. in the next loop iteration).

With that your code will compile but won't work, because once you have processed the first line of the first file you are at the end of the second file, so the second file will appear empty when processing the subsequent lines. You can fix that by rewinding the second file for each line. The minimal set of changes that will make your code work is:

use std::io::Read;
use std::io::Seek;
use std::io::SeekFrom;
use std::fs::File;
use std::io::{BufRead, BufReader};

fn main() {
    let filename1 = "file1.txt";
    let filename2 = "file2.txt";

    // Open the file in read-only mode (ignoring errors).
    let file = File::open(filename1).unwrap();
    let reader = BufReader::new(file);

    let file2 = File::open(filename2).unwrap();
    let mut reader2 = BufReader::new(file2);

    // Read the file line by line using the lines() iterator from std::io::BufRead.
    for line1 in reader.lines() {
        let line1 = line1.unwrap(); // Ignore errors.

        reader2.seek (SeekFrom::Start (0)).unwrap(); // <-- Add this line
        for line2 in reader2.by_ref().lines() {      // <-- Use by_ref here
            let line2 = line2.unwrap(); // Ignore errors.
            
            if line2 == line1 {
                println!("{}",line2)
            }

        }
    }
}

However this will be pretty slow. You can make it much faster by reading one of the files in a HashSet and checking if each line of the other file is in the set:

use std::collections::HashSet;
use std::fs::File;
use std::io::{BufRead, BufReader};

fn main() {
    let filename1 = "file1.txt";
    let filename2 = "file2.txt";

    // Open the file in read-only mode (ignoring errors).
    let file = File::open(filename1).unwrap();
    let reader = BufReader::new(file);

    let file2 = File::open(filename2).unwrap();
    let reader2 = BufReader::new(file2);
    let lines2 = reader2.lines().collect::<Result<HashSet<_>, _>>().unwrap();

    // Read the file line by line using the lines() iterator from std::io::BufRead.
    for line1 in reader.lines() {
        let line1 = line1.unwrap(); // Ignore errors.

        if lines2.contains (&line1) {
            println!("{}", line1)
        }
    }
}

Finally you can also read both files into HashSets and print out the intersection:

use std::collections::HashSet;
use std::fs::File;
use std::io::{BufRead, BufReader};

fn main() {
    let filename1 = "file1.txt";
    let filename2 = "file2.txt";

    // Open the file in read-only mode (ignoring errors).
    let file = File::open(filename1).unwrap();
    let reader = BufReader::new(file);
    let lines1 = reader.lines().collect::<Result<HashSet<_>, _>>().unwrap();

    let file2 = File::open(filename2).unwrap();
    let reader2 = BufReader::new(file2);
    let lines2 = reader2.lines().collect::<Result<HashSet<_>, _>>().unwrap();

    for l in lines1.intersection (&lines2) {
        println!("{}", l)
    }
}

As a bonus this last solution will remove duplicate lines. OTOH it won't preserve the order of the lines.

Jmb
  • 18,893
  • 2
  • 28
  • 55
-1

Although I found a solution, it is horribly slow. If anyone has a better solution to find items similar in 2 files please let me know.

use std::fs::File;
use std::io::{BufRead, BufReader};



fn main() {

   let mut vec2 = findvec("file1.txt".to_string());
   let mut vec3 = &findvec("file2.txt".to_string());

   for line in vec2 {
       for line2 in vec3 {
           if line.to_string() == line2.to_string() {
               println!("{}",line.to_string());
           }
       }
   }
}

    fn findvec(filename: String) -> Vec<String> {

        // Open the file in read-only mode (ignoring errors).
        let file = File::open(filename).unwrap();
        let reader = BufReader::new(file);
    // blank vector
    let mut myvec = Vec::new();
    // Read the file line by line using the lines() iterator from std::io::BufRead.
    for (index, line) in reader.lines().enumerate() {
        let line = line.unwrap(); // Ignore errors.
        // Show the line and its number.
     
        myvec.push(line);
    }

    myvec

    
}
Henri
  • 983
  • 3
  • 11
  • 29