2

I'm trying to convert the following sequential code to a multithreaded code but the results don't sound reasonable to me.

package com.net;

import jdk.incubator.http.HttpClient;
import jdk.incubator.http.HttpRequest;
import jdk.incubator.http.HttpResponse;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

public class Req {

    HttpClient client = HttpClient.newHttpClient();

    private String getResource(String someUrl) {
        String body = "";
        try {
            URI url = new URI(someUrl);
            HttpRequest request = HttpRequest.newBuilder()
                    .uri(url).GET().build();
            HttpResponse<String> response = client.send(request, HttpResponse.BodyHandler.asString());
            body = response.body();
        } catch (URISyntaxException e) {
            System.out.println("URL " + someUrl + "is not valid");
        } catch (IOException | InterruptedException e) {
            System.out.println(e.getMessage());
        }
        return body;
    }


    public static void main(String[] args){

        String[] topIranianSites = {
                "https://www.aparat.com/",
                "http://www.varzesh3.com/",
                "http://namnak.com/",
                "http://www.telewebion.com/",
                "https://divar.ir/",
                "https://www.ninisite.com/",
                "https://www.blogfa.com/",
                "http://www.namasha.com/",
                "http://www.yjc.ir/"
        };

        Req singleThreadReq = new Req();
        float totalElapsedTime = 0F;

        for (String site : topIranianSites){
            long fetchStartTime = System.currentTimeMillis();
            String html = singleThreadReq.getResource(site);
            float elapsed = (float) (System.currentTimeMillis() - fetchStartTime) / 1000;

            Document doc = Jsoup.parse(html);
            System.out.println("It took " + elapsed + " seconds to fetch " + site + " with title " + doc.title());
            totalElapsedTime += elapsed;
        }

        System.out.println("Total Elapsed Time: " + totalElapsedTime + "\nTotal Number of sites: " + topIranianSites.length);

    }
}

This is the output

WARNING: Using incubator modules: jdk.incubator.httpclient
It took 2.622 seconds to fetch https://www.aparat.com/ with title آپارات - سرویس اشتراک ویدیو
It took 0.455 seconds to fetch http://www.varzesh3.com/ with title 
It took 0.521 seconds to fetch http://namnak.com/ with title نمناک
It took 2.172 seconds to fetch http://www.telewebion.com/ with title تلوبیون | مرجع پخش زنده و دانلود فیلم ، سریال و سایر برنامه های تلویزیون
General SSLEngine problem
It took 0.229 seconds to fetch https://divar.ir/ with title 
It took 1.769 seconds to fetch https://www.ninisite.com/ with title نی نی سایت | راهنمای بارداری و بچه داری
Received fatal alert: handshake_failure
It took 0.382 seconds to fetch https://www.blogfa.com/ with title 
It took 2.641 seconds to fetch http://www.namasha.com/ with title نماشا - سرویس رایگان اشتراک ویدیو
It took 0.503 seconds to fetch http://www.yjc.ir/ with title 
Total Elapsed Time: 11.294001
Total Number of sites: 9

From the sequential output I guess the correct multithreaded code should take about 2.8 seconds to fetch all 9 sites. But my implementation of multithreaded code takes more

package com.net;

import jdk.incubator.http.HttpClient;
import jdk.incubator.http.HttpRequest;
import jdk.incubator.http.HttpResponse;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

public class ReqThreaded implements Runnable {

    class Site {
        String url;
        String title;
        float fetchTime;
    }

    private HttpClient client = HttpClient.newHttpClient();

    private Thread[] threadPool;
    private String[] rawSites;
    private Site[] sitesArr;
    private int sitesDone = 0;
    long startTime = System.currentTimeMillis();
    float totalElapsed = 0F;

    public ReqThreaded(String[] sites) {
        threadPool = new Thread[sites.length];
        sitesArr = new Site[sites.length];
        rawSites = sites;

        for (int i = 0; i < sites.length; i++) {
            startThread(i);
        }

        while (sitesDone < sites.length) {
            try {
                Thread.sleep(1000);
                totalElapsed = (float) (System.currentTimeMillis() - startTime) / 1000;
                System.out.print("\rElapsed time: " + totalElapsed + "Sites Done: " + sitesDone);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }

        System.out.println("\n\nStatistics:\n\n");

        for (Site someSite : sitesArr) {
            System.out.println("URL " + someSite.url + "\nTitle: " + someSite.title + "\nFetch Time: " + someSite.fetchTime + "\n\n");
        }

    }

    private void startThread(int i) {
        if (threadPool[i] == null) {
            threadPool[i] = new Thread(this);
            threadPool[i].start();
        }
    }


    private String getResource(String someUrl) {
        String body = "";
        try {
            URI url = new URI(someUrl);
            HttpRequest request = HttpRequest.newBuilder()
                    .uri(url).GET().build();
            HttpResponse<String> response = client.send(request, HttpResponse.BodyHandler.asString());
            body = response.body();
        } catch (URISyntaxException e) {
            System.out.println("URL " + someUrl + "is not valid");
        } catch (IOException | InterruptedException e) {
            System.out.println(e.getMessage());
        }
        return body;
    }


    @Override
    public void run() {
        Thread thisThread = Thread.currentThread();
        int sitesIndex = 0;

        for (int j = 0; j < threadPool.length; j++) {
            if (thisThread == threadPool[j]) {
                sitesIndex = j;
            }
        }
        long fetchStartTime = System.currentTimeMillis();
        String html = getResource(rawSites[sitesIndex]);
        float elapsed = (float) (System.currentTimeMillis() - fetchStartTime) / 1000;
        sitesDone++;
        Document doc = Jsoup.parse(html);
        sitesArr[sitesIndex] = new Site();
        sitesArr[sitesIndex].url = rawSites[sitesIndex];
        sitesArr[sitesIndex].title = doc.title();
        sitesArr[sitesIndex].fetchTime = elapsed;
    }

    public static void main(String[] args) {

        String[] topIranianSites = {
                "https://www.aparat.com/",
                "http://www.varzesh3.com/",
                "http://namnak.com/",
                "http://www.telewebion.com/",
                "https://divar.ir/",
                "https://www.ninisite.com/",
                "https://www.blogfa.com/",
                "http://www.namasha.com/",
                "http://www.yjc.ir/"
        };

        new ReqThreaded(topIranianSites);

    }
}

This is the output of multithreaded code. Both Total time and fetch time for each url seems incorrect. I think something is blocking here or some kind of race condition. What's wrong here?

WARNING: Using incubator modules: jdk.incubator.httpclient
General SSLEngine problem
Received fatal alert: handshake_failure
Elapsed time: 7.068Sites Done: 9

Statistics:


URL https://www.aparat.com/
Title: آپارات - سرویس اشتراک ویدیو
Fetch Time: 4.808


URL http://www.varzesh3.com/
Title: 
Fetch Time: 5.904


URL http://namnak.com/
Title: نمناک
Fetch Time: 1.056


URL http://www.telewebion.com/
Title: تلوبیون | مرجع پخش زنده و دانلود فیلم ، سریال و سایر برنامه های تلویزیون
Fetch Time: 6.569


URL https://divar.ir/
Title: 
Fetch Time: 0.53


URL https://www.ninisite.com/
Title: نی نی سایت | راهنمای بارداری و بچه داری
Fetch Time: 4.287


URL https://www.blogfa.com/
Title: 
Fetch Time: 0.767


URL http://www.namasha.com/
Title: نماشا - سرویس رایگان اشتراک ویدیو
Fetch Time: 4.539


URL http://www.yjc.ir/
Title: 
Fetch Time: 0.836
m.d
  • 169
  • 2
  • 9
  • 2
    First of all [creating Threads in java is expensive](https://stackoverflow.com/questions/5483047/why-is-creating-a-thread-said-to-be-expensive), second `Thread.sleep(1000);` will unnecessarily increase the duration. –  Mar 07 '18 at 12:34
  • 3
    You should use an `ExecutorService` instead. 90% of the code in the question is unnecessary and possibly wrong. – Kayaman Mar 07 '18 at 12:42
  • @devpuh i thought threads are far lighter than processes to work with blocking operations so what's the right way? something like async await that uses coroutines is the best way to handle this kind of job in python. what's the best way to do it in java? – m.d Mar 07 '18 at 12:53
  • @kayman Could you provide me with the right answer? – m.d Mar 07 '18 at 12:55
  • @m.d indeed, java threads are lighter than processes. But each thread requires memory for its stack, usually 0.1 - 1 megabytes. So the right approach is to use threads while you can afford to spend memory for them, then switch to tasks and thread pool. – Alexei Kaigorodov Mar 08 '18 at 08:52
  • @AlexeiKaigorodov I am really eager to see your implementation of multithreaded code. Can you edit your answer to include that? thanks. – m.d Mar 08 '18 at 09:42
  • Possible duplicate of [Java 9 Multithreaded HTTP request](https://stackoverflow.com/questions/49179537/java-9-multithreaded-http-request) – Robert Mar 08 '18 at 18:20
  • Please don't post your question twice. – Robert Mar 08 '18 at 18:21

1 Answers1

0

I fixed some obvious errors in class ReqThreaded like improper synchronization, moved method run() into class Site, and made result printing identical to that in the single-threaded variant. Thread pool was not used, for each request a separate thread was created.

The result is as follows:

Received fatal alert: handshake_failure
It took 0.263 seconds to fetch https://www.blogfa.com/ with title 
General SSLEngine problem
It took 0.491 seconds to fetch https://divar.ir/ with title 
It took 1.02 seconds to fetch http://www.yjc.ir/ with title 
It took 1.056 seconds to fetch http://www.telewebion.com/ with title تلوبیون | مرجع پخش زنده و دانلود فیلم ، سریال و سایر برنامه های تلویزیون
It took 1.262 seconds to fetch https://www.ninisite.com/ with title نی نی سایت | راهنمای بارداری و بچه داری
It took 1.411 seconds to fetch http://namnak.com/ with title نمناک
It took 1.608 seconds to fetch http://www.varzesh3.com/ with title ورزش سه :: صفحه اصلی
It took 2.221 seconds to fetch http://www.namasha.com/ with title نماشا - سرویس رایگان اشتراک ویدیو
It took 2.247 seconds to fetch https://www.aparat.com/ with title آپارات - سرویس اشتراک ویدیو
Elapsed time: 2.253
Sites Done: 9
Process finished with exit code 0

That is, total time is only slightly bigger than the longest time to get result from a site.

Multithreading works!

Modified code is as follows:

import jdk.incubator.http.HttpClient;
import jdk.incubator.http.HttpRequest;
import jdk.incubator.http.HttpResponse;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.concurrent.CountDownLatch;

public class ReqThreaded {

class Site implements Runnable {
    final String url;
    String title;
    float fetchTime;

    Site(String url) {
        this.url = url;
    }

    @Override
    public void run() {
        long fetchStartTime = System.currentTimeMillis();
        String html = getResource(url);
        float elapsed = (float) (System.currentTimeMillis() - fetchStartTime) / 1000;
        Document doc = Jsoup.parse(html);
        title = doc.title();
        fetchTime = elapsed;
        System.out.println("It took " + fetchTime + " seconds to fetch " + url + " with title " + title);
        sitesDone.countDown();
    }
}

private HttpClient client = HttpClient.newHttpClient();

private CountDownLatch sitesDone;

public ReqThreaded(String[] sites) throws InterruptedException {
    int siteNumber = sites.length;
    sitesDone = new CountDownLatch(siteNumber);
    long startTime = System.currentTimeMillis();

    for (int i = 0; i < siteNumber; i++) {
        Runnable site = new Site(sites[i]);
        Thread thread = new Thread(site);
        thread.start();
    }

    sitesDone.await();
    float totalElapsed = (float) (System.currentTimeMillis() - startTime) / 1000;
    System.out.print("\rElapsed time: " + totalElapsed + "\nSites Done: " + siteNumber);
}

private String getResource(String someUrl) {
    String body = "";
    try {
        URI url = new URI(someUrl);
        HttpRequest request = HttpRequest.newBuilder()
                .uri(url).GET().build();
        HttpResponse<String> response = client.send(request, HttpResponse.BodyHandler.asString());
        body = response.body();
    } catch (URISyntaxException e) {
        System.out.println("URL " + someUrl + "is not valid");
    } catch (IOException | InterruptedException e) {
        System.out.println(e.getMessage());
    }
    return body;
}

public static void main(String[] args) throws InterruptedException {

    String[] topIranianSites = {
            "https://www.aparat.com/",
            "http://www.varzesh3.com/",
            "http://namnak.com/",
            "http://www.telewebion.com/",
            "https://divar.ir/",
            "https://www.ninisite.com/",
            "https://www.blogfa.com/",
            "http://www.namasha.com/",
            "http://www.yjc.ir/"
    };

    new ReqThreaded(topIranianSites);

}
}

That is:

  • never run the same method of the same object on different threads simultaneously

  • never exchange information between threads via plain variables. Use only specialized facilities. Here I used CountDownLatch, to signal the end of each thread. If I want to return some information to the main thread, I'd use BlockingQueue instead.

Alexei Kaigorodov
  • 13,189
  • 1
  • 21
  • 38
  • Thanks for improving the code but where is the multithreaded code? I am a newbie in java but thought the same. I think the main point of problem in my implementation of multithreaded code is when multiple threads are trying to access the same method of an object. It has some kind of blocking effect i think can you elaborate more on this? – m.d Mar 08 '18 at 09:37
  • when i run your code after the fifth site there is a jump from 1.8 second to more than 3 seconds and it ends in more than 7 seconds why is that?? – m.d Mar 08 '18 at 18:50
  • You cannot say "after the fifth site", because all sites are requested in parallel: requesting starts at the same time, but ends when the site replies. Indeed some sites reply quickly and some with delay. Compare total elapsed time with the longest time for single site. If they are close, then it is ok. – Alexei Kaigorodov Mar 08 '18 at 20:27
  • Thanks alex. With your hints and guidance i have written another implementation of multithreaded code [here](https://stackoverflow.com/questions/49179537/java-9-multithreaded-http-request) which runs so faster in my tests. The trip time range is between 0.2 - 1.3 seconds but in your implementation the trip time is between 0.95 - 11.57 seconds. Since you are an specialist in concurrency would you please take a look at the code there and tell me why there is such a difference in trip code? I can't figure it out. – m.d Mar 09 '18 at 07:51
  • The sites I'm testing against are top worldwide list of [alexa](https://www.alexa.com/topsites) – m.d Mar 09 '18 at 08:01
  • I cannot explain why times differ. I would accetp the answer of teeman12 and utilize thread pool, and then run with `poolsize` from 1 to 10. If times for poolsize=1 are equals to the times of ReqSingle, then the reason is your network connection, otherwise there is some problem in multithreaded implementation. – Alexei Kaigorodov Mar 09 '18 at 10:15
  • Thanks alex good point i will go with the `poolsize` test as you recommanded – m.d Mar 09 '18 at 10:59