0
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;

public class work {

    public static void main(String[] args) throws FileNotFoundException, IOException {
        Map m1 = new HashMap();
        try (BufferedReader br = new BufferedReader(new FileReader("error.txt"))) {
            StringBuilder sb = new StringBuilder();
            String line = br.readLine();
            while (line != null) {
                String[] words = line.split(" ");//**This is where i was strucked**
                for (int i = 0; i < words.length; i++) {
                    if (m1.get(words[i]) == null) {
                        m1.put(words[i], 1);
                    } else {

                        int newValue = Integer.valueOf(String.valueOf(m1.get(words[i])));

                        newValue++;
                        m1.put(words[i], newValue);
                    }
                }
                sb.append(System.lineSeparator());
                line = br.readLine();
            }
        }
        Map<String, String> sorted = new TreeMap<String, String>(m1);
        for (Object key : sorted.keySet()) {
            System.out.println("Error : " + key + "Repeated " + m1.get(key) + " times.");
        }
    }

}

I have a text file as below and i want to count of duplicate lines.I was strucked at how to split this and count.Can any one help me.

ERROR  [CompactionExecutor:21454] 2018-10-29 12:02:41,906 NoSpamLogger.java:91 - Maximum memory usage reached (125.000MiB), cannot allocate chunk of 1.000MiB
ERROR  [CompactionExecutor:21454] 2018-10-29 12:02:41,906 NoSpamLogger.java:91 - Maximum memory usage reached (125.000MiB), cannot allocate chunk of 1.000MiB
ERROR  [CompactionExecutor:21454] 2018-10-29 12:02:41,906 NoSpamLogger.java:91 - Maximum memory usage reached (125.000MiB), cannot allocate chunk of 1.000MiB
ERROR  [CompactionExecutor:21454] 2018-10-29 12:02:41,906 NoSpamLogger.java:91 - Maximum memory usage reached (125.000MiB), cannot allocate chunk of 1.000MiB
ERROR  [CompactionExecutor:21454] 2018-10-29 12:02:41,906 NoSpamLogger.java:91 - Maximum memory usage reached (125.000MiB), cannot allocate chunk of 1.000MiB
2018-09-20 14:08:14.571 [main] ERROR  org.apache.flink.yarn.YarnApplicationMasterRunner  -     -Dlogback.configurationFile=file:logback.xml
2018-09-20 14:08:14.571 [main] ERROR  org.apache.flink.yarn.YarnApplicationMasterRunner  -     -Dlogback.configurationFile=file:logback.xml
ERROR  [CompactionExecutor:21454] 2018-10-29 12:02:41,906 NoSpamLogger.java:91 - Maximum memory usage reached (125.000MiB), cannot allocate chunk of 1.000MiB
ERROR  [CompactionExecutor:21454] 2018-10-29 12:02:41,906 NoSpamLogger.java:91 - Maximum memory usage reached (125.000MiB), cannot allocate chunk of 1.000MiB
ERROR  [CompactionExecutor:21454] 2018-10-29 12:02:41,906 NoSpamLogger.java:91 - Maximum memory usage reached (125.000MiB), cannot allocate chunk of 1.000MiB
    2018-10-29T12:01:00Z E! Error in plugin [inputs.openldap]: LDAP Result Code 32 "No Such Object": 
    2018-10-29T12:01:00Z E! Error in plugin [inputs.openldap]: LDAP Result Code 32 "No Such Object": 
    2018-10-29T12:01:00Z E! Error in plugin [inputs.openldap]: LDAP Result Code 32 "No Such Object": 
    2018-10-29T12:01:00Z E! Error in plugin [inputs.openldap]: LDAP Result Code 32 "No Such Object": 
    2018-10-29T12:01:00Z E! Error in plugin [inputs.openldap]: LDAP Result Code 32 "No Such Object": 
    2018-10-29T12:01:00Z E! Error in plugin [inputs.openldap]: LDAP Result Code 32 "No Such Object": 
    2018-10-29T12:01:00Z E! Error in plugin [inputs.openldap]: LDAP Result Code 32 "No Such Object": 
    2018-10-29T12:01:00Z E! Error in plugin [inputs.openldap]: LDAP Result Code 32 "No Such Object": 
ERROR  [CompactionExecutor:21454] 2018-10-29 12:02:41,906 NoSpamLogger.java:91 - Maximum memory usage reached (125.000MiB), cannot allocate chunk of 1.000MiB
    2018-09-20 14:08:14.571 [main] ERROR  org.apache.flink.yarn.YarnApplicationMasterRunner  -     -Dlogback.configurationFile=file:logback.xml
    2018-09-20 14:08:14.571 [main] ERROR  org.apache.flink.yarn.YarnApplicationMasterRunner  -     -Dlogback.configurationFile=file:logback.xml
    2018-09-20 14:08:14.571 [main] ERROR  org.apache.flink.yarn.YarnApplicationMasterRunner  -     -Dlogback.configurationFile=file:logback.xml
    2018-09-20 14:08:14.571 [main] ERROR  org.apache.flink.yarn.YarnApplicationMasterRunner  -     -Dlogback.configurationFile=file:logback.xml
    2018-09-20 14:08:14.571 [main] ERROR  org.apache.flink.yarn.YarnApplicationMasterRunner  -     -Dlogback.configurationFile=file:logback.xml
Scary Wombat
  • 44,617
  • 6
  • 35
  • 64
Pavan
  • 33
  • 6
  • this might be helpful - https://stackoverflow.com/questions/46796021/nospamlogger-java-maximum-memory-usage-reached-cassandra – Derrick Oct 31 '18 at 07:17
  • If you just was to count duplicate lines then why do you need to split on words? – Scary Wombat Oct 31 '18 at 07:18
  • Then how to count duplicate lines – Pavan Oct 31 '18 at 07:18
  • create a `Map` – Scary Wombat Oct 31 '18 at 07:25
  • For every sentence with index i, check if it's a duplicate of any sentence at index in [0, i-1]. If yes then skip, else count number of duplicates by taking similarity with every sentence at index [i+1, n-1]. Finally add all such counts. Complexity will O(n^2*L), where L is length of longest sentence. – thepurpleowl Oct 31 '18 at 07:26
  • Oh, I see you ruuning it on yarn base on "YarnApplicationMasterRunner" and you have reached 'Maximum memory usage'? I think you may consider get rid of transforming HashMap into TreeMap. And also get rid of adding to StringBuilder spaces. – Dzmitry Prakapenka Oct 31 '18 at 07:28
  • And as specified by @thepurpleowl there is algorithms without extensive memory usage in lack of performance. – Dzmitry Prakapenka Oct 31 '18 at 07:29

2 Answers2

1

Please try this if you are using Java8, and intent is to count duplicate lines (not words)

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;

public class Work {
    public static void main(String[] args) throws IOException {
        Map<String, Long> dupes = Files.lines(Paths.get("/tmp/error.txt"))
                .collect(Collectors.groupingBy(Function.identity(), 
                     Collectors.counting()));

        // pretty print
        dupes.forEach((k, v)-> System.out.printf("(%d) times : %s ....%n", 
             v, k.substring(0,  Math.min(50, k.length()))));
    }
}

output:

(2) times : 2018-09-20 14:08:14.571 [main] ERROR  org.apache.f ....
(8) times :     2018-10-29T12:01:00Z E! Error in plugin [input ....
(9) times : ERROR  [CompactionExecutor:21454] 2018-10-29 12:02 ....
(5) times :     2018-09-20 14:08:14.571 [main] ERROR  org.apac ....
rifaqat
  • 301
  • 2
  • 8
0

Map<String,Integer> can be used with the record as key and count as value.

    Map<String,Integer>  countMap= new HashMap<String,Integer>();

    try (
            BufferedReader  br= new BufferedReader(new FileReader(new File("D:\\error.txt")))

        ){

        String data="";
        while ((data=br.readLine())!=null) {

            if(countMap.containsKey(data)) {
                countMap.put(data, countMap.get(data)+1);
            }else {
                countMap.put(data, 1);
            }

        }

        countMap.forEach((k,v)->{System.out.println(k+" Occurs "+v+" times.");});

    } catch (IOException  e) {
        e.printStackTrace();
    }