7

someone can help me with code? How to search in text file any word and count how many it were repeated?

For example test.txt:

hi
hola
hey
hi
bye
hoola
hi

And if I want to know how many times are repeated in test.txt word "Hi" program must say "3 times repeated"

I hope you understood what I want, thank you for answers.

13 Answers13

13
public int countWord(String word, File file) {
int count = 0;
Scanner scanner = new Scanner(file);
while (scanner.hasNextLine()) {
    String nextToken = scanner.next();
    if (nextToken.equalsIgnoreCase(word))
    count++;
}
return count;
}
Udo Klimaschewski
  • 5,150
  • 1
  • 28
  • 41
  • this gives exception if the file contain space at the end of line. – anshulkatta Jun 27 '14 at 13:49
  • 1
    this fails when the word is after dot. for eg. Italy is country.Italy is great place . the dot after italy makes it a complete word ".Italy" so , the count for Italy will give 1 – anshulkatta Jun 27 '14 at 13:55
3
HashMap h=new HashMap();                        
FileInputStream fin=new FileInputStream("d:\\file.txt");
BufferedReader br=new BufferedReader(new InputStreamReader(fin));
String n;
while((n=br.readLine())!=null)
{
    if(h.containsKey(n))
    {
    int i=(Integer)h.get(n);
    h.put(n,(i+1));
    }
    else
    h.put(n, 1);
}

now iterate through this map to get the count for each word using each word as a key to the map values

Bhavik Shah
  • 5,125
  • 3
  • 23
  • 40
2

Apache Commons - StringUtils.countMatches()

Andy
  • 1,964
  • 1
  • 15
  • 29
1

Use MultiSet collection from google guava library.

Multiset<String> wordsMultiset = HashMultiset.create();
Scanner scanner = new Scanner(fileName);
while (scanner.hasNextLine()) {
    wordsMultiset.add(scanner.nextLine());
}
for(Multiset.Entry<String> entry : wordsMultiset ){
     System.out.println("Word : "+entry.getElement()+" count -> "+entry.getCount());
}
Subhrajyoti Majumder
  • 40,646
  • 13
  • 77
  • 103
1
package File1;

import java.io.BufferedReader;
import java.io.FileReader;

public class CountLineWordsDuplicateWords {

public static void main(String[] args) {
    FileReader fr = null;
    BufferedReader br =null;

    String [] stringArray;
    int counLine = 0;
    int arrayLength ;
    String s="";
    String stringLine="";
    try{
        fr = new FileReader("F:/Line.txt");
        br = new BufferedReader(fr);
        while((s = br.readLine()) != null){
            stringLine = stringLine + s;
            stringLine = stringLine + " ";/*Add space*/
            counLine ++;
        }
        System.out.println(stringLine);

        stringArray = stringLine.split(" ");
        arrayLength = stringArray.length;
                     System.out.println("The number of Words is "+arrayLength);
        /*Duplicate String count code */
        for (int i = 0; i < arrayLength; i++) {
            int c = 1 ;
            for (int j = i+1; j < arrayLength; j++) {
                if(stringArray[i].equalsIgnoreCase(stringArray[j])){
                    c++;
                    for (int j2 = j; j2 < arrayLength; j2++) {
                        stringArray[j2] = stringArray[j2+1];
                        arrayLength = arrayLength - 1;
                    }

                }//End of If block
            }//End of Inner for block
        System.out.println("The "+stringArray[i]+" present "+c+" times .");
        }//End of Outer for block
        System.out.println("The number of Line is "+counLine);
        System.out.println();
        fr.close();
        br.close();
    }catch (Exception e) {
        e.printStackTrace();
    }
}//End of main() method 
}//End of class CountLineWordsDuplicateWords
1
package somePackage;   
public static void main(String[] args) {

            String path = ""; //ADD YOUR PATH HERE
            String fileName = "test2.txt";
            String testWord = "Macbeth"; //CHANGE THIS IF YOU WANT
            int tLen = testWord.length();
            int wordCntr = 0;
            String file = path + fileName;
            boolean check;

            try{
                FileInputStream fstream = new FileInputStream(file);
                BufferedReader br = new BufferedReader(new InputStreamReader(fstream));
                String strLine;        
                //Read File Line By Line
                while((strLine = br.readLine()) != null){                
                    //check to see whether testWord occurs at least once in the line of text
                    check = strLine.toLowerCase().contains(testWord.toLowerCase());
                    if(check){                    
                        //get the line, and parse its words into a String array
                        String[] lineWords = strLine.split("\\s+");                    
                        for(String w : lineWords){
                            //first see if the word is as least as long as the testWord
                            if(w.length() >= tLen){
                                /*
                                1) grab the specific word, minus whitespace
                                2) check to see whether the first part of it having same length
                                    as testWord is equivalent to testWord, ignoring case
                                */
                                String word = w.substring(0,tLen).trim();                                                        
                                if(word.equalsIgnoreCase(testWord)){                                
                                    wordCntr++;
                                }                            
                            }
                        }                    
                    }   
                }            
                System.out.println("total is: " + wordCntr);
            //Close the input stream
            br.close();
            } catch(Exception e){
                e.printStackTrace();
            }
        }
Accribus
  • 150
  • 14
1
public class Wordcount 
{
   public static void main(String[] args)
   {       
       int count=0;

       String str="hi this is is is line";

       String []s1=str.split(" ");

       for(int i=0;i<=s1.length-1;i++)
       {
          if(s1[i].equals("is"))
           {
               count++; 
           }
       }

       System.out.println(count);
   }
}
AnV
  • 2,794
  • 3
  • 32
  • 43
Jagan
  • 11
  • 1
  • Hi, welcome to SO, posting new, updated solutions to old questions is always good, but please try making those answers as much as informative and clear as possible. try adding descriptions on to your code and make sure it's formatted right. also please try avoiding useless comments. – svarog Nov 21 '15 at 12:41
  • How does this answer add value to a 3 years old post? There are other similar answers here. – cassiomolin Nov 21 '15 at 12:52
0

You can read text file line by line. I assume that each line can contain more than one word. For each line, you call:

String[] words = line.split(" "); 
for(int i=0; i<words.length; i++){
   if(words[i].equalsIgnoreCase(searhedWord))
         count++;
}
dijkstra
  • 1,068
  • 2
  • 16
  • 39
0

try using java.util.Scanner.

public int countWords(String w, String fileName) {
int count = 0;
Scanner scanner = new Scanner(inputFile);
scanner.useDelimiter("[^a-zA-Z]"); // non alphabets act as delimeters
String word = scanner.next();
if (word.equalsIgnoreCase(w))
    count++;
   return count;
}
Ravindra Bagale
  • 17,226
  • 9
  • 43
  • 70
0

Try it this way with Pattern and Matcher.

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Dem {

    public static void main(String[] args){

        try {
            File f = new File("d://My.txt");
            FileReader fr = new FileReader(f);
            BufferedReader br = new BufferedReader(fr);
            String s = new String();

            while((s=br.readLine())!=null){

                s = s + s;

            }

            int count = 0;
            Pattern pat = Pattern.compile("it*");
            Matcher mat = pat.matcher(s);

            while(mat.find()){

                  if(mat.find()){

                      mat.start();
                      count++;

                  }

            }

            System.out.println(count);
        } catch (Exception e) {

            e.printStackTrace();
        }
    }

}
Kumar Vivek Mitra
  • 33,294
  • 6
  • 48
  • 75
0
import java.io.*;
import java.util.*;

class filedemo
{
public static void main(String ar[])throws Exception
BufferedReader br=new BufferedReader(new FileReader("c:/file.txt"));
 System.out.println("enter the string which you search");
 Scanner ob=new Scanner(System.in);
 String str=ob.next();
 String str1="",str2="";
 int count=0;
while((str1=br.readLine())!=null)
 {
 str2 +=str1;

}  

 int index = str2.indexOf(str);

 while (index != -1) {
 count++;
 str2 = str2.substring(index + 1);
 index = str2.indexOf(str);
}

System.out.println("Number of the occures="+count);
}
}  
0
package com.test;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.Scanner;

public  class Test {

    public static void main(String[] args)  throws Exception{

        BufferedReader bf= new BufferedReader(new FileReader("src/test.txt"));
        Scanner sc = new Scanner(System.in);
        String W=sc.next();
        //String regex ="[\\w"+W+"]";
        int count=0;

        //Pattern p = Pattern.compile();
        String line=bf.readLine();
        String s[];
        do
        {
            s=line.split(" ");
            for(String a:s)
            {
                if(a.contains(W))
                    count++;

            }


            line=bf.readLine();


        }while(line!=null);
        System.out.println(count);
    }



}
anshulkatta
  • 2,044
  • 22
  • 30
0
public int occurrencesOfHi()
{
    String newText = Text.replace("Hi","");
    return (Text.length() - newText.length())/2;
}
John V
  • 5
  • 2