4

My application is based on Micronaut and GraalVM (java 11) and does a simple call to http://httpbin.org/get:

@Controller("/api")
class HelloWorld(
    @Client("http://httpbin.org")
    private val httpClient: RxHttpClient
) {
    private val logger = LoggerFactory.getLogger(javaClass)

    @Get("/hello")
    fun hello(): String {
        return "Hello World!"
    }

    @Get("/fb")
    fun fb(): Flowable<String> {
        logger.info("Trying to call FB")
        logger.info("Using url http://httpbin.org/get")
        try {
            return httpClient.retrieve("/get")
                .doOnError { logger.error("Error calling fb api flowable", it) }
                .doFinally { logger.info("Finished calling FB api flowable") }
        } catch (ex: Exception) {
            logger.error("Error calling fb api", ex)
            throw ex
        } finally {
            logger.info("Finished calling fb api")
        }
    }
}

When I build a docker image of the app using this Dockerfile:

FROM maven:3.6.3-jdk-11 as maven
COPY . /home/app
WORKDIR /home/app
RUN mvn package

FROM oracle/graalvm-ce:19.3.1-java11 as graalvm
COPY --from=maven /home/app/target/app-*.jar /home/app/
WORKDIR /home/app
RUN gu install native-image
RUN native-image --no-server --enable-http --enable-https -cp app-*.jar

FROM debian:stretch
EXPOSE 8080
COPY --from=graalvm /home/app/app .
#RUN apt-get update && apt-get -y install strace
ENTRYPOINT ["./app"]

Everything works in my local environment. But when I push the image to Google Cloud Repository, deploy it to Cloud Run and try to access the endpoint /api/fb, it crashes the container with 503 Service Unavailable.

The error in the logs is "The request failed because the HTTP connection to the instance had an error.". When I enable strace, these are the logs:

A 2020-02-07T12:04:27.443115Z [pid    18] <... futex resumed> )       = -1 ETIMEDOUT (Connection timed out) 
A 2020-02-07T12:04:27.443125Z [pid    18] futex(0x3e61dc018a80, FUTEX_WAKE_PRIVATE, 1) = 0 
A 2020-02-07T12:04:27.443357Z [pid    18] futex(0x3e62040009c4, FUTEX_CMP_REQUEUE_PRIVATE, 1, 2147483647, 0x3e6204000990, 234) = 1 
A 2020-02-07T12:04:27.443369Z [pid    15] <... futex resumed> )       = 0 
A 2020-02-07T12:04:27.443440Z [pid    18] futex(0x3e61dc018ab4, FUTEX_WAIT_BITSET_PRIVATE, 157, {tv_sec=3624, tv_nsec=391761056}, 0xffffffff <unfinished ...> 
A 2020-02-07T12:04:27.443478Z [pid    15] futex(0x3e6204000990, FUTEX_WAKE_PRIVATE, 1) = 0 
A 2020-02-07T12:04:27.959629Z [pid    20] epoll_wait(17,  <unfinished ...> 
A 2020-02-07T12:04:27.959658Z [pid     8] <... epoll_wait resumed> [{EPOLLIN, {u32=53, u64=53}}], 1024, -1) = 1 
A 2020-02-07T12:04:27.959865Z [pid     8] accept(53, {sa_family=AF_INET6, sin6_port=htons(36294), inet_pton(AF_INET6, "::ffff:169.254.8.129", &sin6_addr), sin6_flowinfo=htonl(0), sin6_scope_id=0}, [28]) = 4 
A 2020-02-07T12:04:27.959884Z [pid     8] fcntl(4, F_GETFL)           = 0x2 (flags O_RDWR) 
A 2020-02-07T12:04:27.959945Z [pid     8] getsockname(4, {sa_family=AF_INET6, sin6_port=htons(8080), inet_pton(AF_INET6, "::ffff:169.254.8.130", &sin6_addr), sin6_flowinfo=htonl(0), sin6_scope_id=0}, [28]) = 0 
A 2020-02-07T12:04:27.960036Z [pid     8] getsockname(4, {sa_family=AF_INET6, sin6_port=htons(8080), inet_pton(AF_INET6, "::ffff:169.254.8.130", &sin6_addr), sin6_flowinfo=htonl(0), sin6_scope_id=0}, [28]) = 0 
A 2020-02-07T12:04:27.960085Z [pid     8] fcntl(4, F_GETFL)           = 0x2 (flags O_RDWR) 
A 2020-02-07T12:04:27.960128Z [pid     8] fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 
A 2020-02-07T12:04:27.960211Z [pid     8] setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 
A 2020-02-07T12:04:27.960322Z [pid     8] getsockopt(4, SOL_SOCKET, SO_SNDBUF, [1048576], [4]) = 0 
A 2020-02-07T12:04:27.960440Z [pid     8] mmap(NULL, 8392704, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x3e60f0e00000 
A 2020-02-07T12:04:27.960564Z [pid     8] mprotect(0x3e60f0e00000, 4096, PROT_NONE) = 0 
A 2020-02-07T12:04:27.960793Z [pid     8] clone(/usr/bin/strace: Process 22 attached 
A 2020-02-07T12:04:27.960875Z  <unfinished ...> 
A 2020-02-07T12:04:27.965711Z [pid    22] set_robust_list(0x3e60f16009e0, 24 <unfinished ...> 
A 2020-02-07T12:04:27.965738Z [pid     8] <... clone resumed> child_stack=0x3e60f15fffb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x3e60f16009d0, tls=0x3e60f1600700, child_tidptr=0x3e60f16009d0) = 22 
A 2020-02-07T12:04:27.965753Z [pid    22] <... set_robust_list resumed> ) = -1 ENOSYS (Function not implemented) 
A 2020-02-07T12:04:27.965761Z [pid     8] epoll_wait(29,  <unfinished ...> 
A 2020-02-07T12:04:27.965770Z [pid    22] sched_getaffinity(22, 32, [0, 1]) = 8 
A 2020-02-07T12:04:27.965783Z [pid    22] futex(0x3b4c0c4, FUTEX_CMP_REQUEUE_PRIVATE, 1, 2147483647, 0x3b4c068, 30) = 1 
A 2020-02-07T12:04:27.965790Z [pid     3] <... futex resumed> )       = 0 
A 2020-02-07T12:04:27.965799Z [pid     3] futex(0x3b4c068, FUTEX_WAIT_PRIVATE, 2, NULL <unfinished ...> 
A 2020-02-07T12:04:27.965807Z [pid    22] futex(0x3b4c068, FUTEX_WAKE_PRIVATE, 1) = 1 
A 2020-02-07T12:04:27.965816Z [pid    22] mmap(NULL, 2097152, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...> 
A 2020-02-07T12:04:27.965824Z [pid     3] <... futex resumed> )       = 0 
A 2020-02-07T12:04:27.965833Z [pid     3] futex(0x3b4c068, FUTEX_WAKE_PRIVATE, 1 <unfinished ...> 
A 2020-02-07T12:04:27.965842Z [pid    22] <... mmap resumed> )        = 0x3e60f0c00000 
A 2020-02-07T12:04:27.965850Z [pid     3] <... futex resumed> )       = 0 
A 2020-02-07T12:04:27.965858Z [pid     3] futex(0x3b4c0c4, FUTEX_WAIT_PRIVATE, 31, NULL <unfinished ...> 
A 2020-02-07T12:04:27.965866Z [pid    22] munmap(0x3e60f0d00000, 1048576) = 0 
A 2020-02-07T12:04:27.965874Z [pid    22] prctl(PR_SET_NAME, "ntLoopGroup-1-7"...) = 0 
A 2020-02-07T12:04:27.965882Z [pid    22] epoll_wait(20, [], 1024, 0) = 0 
A 2020-02-07T12:04:27.965891Z [pid    22] mmap(NULL, 2097152, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x3e60f0a00000 
A 2020-02-07T12:04:27.965903Z [pid    22] munmap(0x3e60f0b00000, 1048576) = 0 
A 2020-02-07T12:04:27.966731Z [pid    22] epoll_ctl(20, EPOLL_CTL_ADD, 4, {EPOLLIN, {u32=4, u64=4}}) = 0 
A 2020-02-07T12:04:27.966836Z [pid    22] epoll_wait(20, [{EPOLLIN, {u32=4, u64=4}}], 1024, 299996) = 1 
A 2020-02-07T12:04:27.967306Z [pid    22] mmap(NULL, 2097152, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x3e60f0800000 
A 2020-02-07T12:04:27.967325Z [pid    22] munmap(0x3e60f0900000, 1048576) = 0 
A 2020-02-07T12:04:27.967493Z [pid    22] mmap(NULL, 16781312, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x3e60ef600000 
A 2020-02-07T12:04:27.984977Z [pid    22] read(4, "GET /api/fb HTTP/1.1\r\nhost: app"..., 1024) = 864 
A 2020-02-07T12:04:27.985474Z [36m12:04:27.985[0;39m [1;30m[nioEventLoopGroup-1-7][0;39m [34mINFO [0;39m [35mcom.roihunter.app.HelloWorld[0;39m - Trying to call FB 
A 2020-02-07T12:04:27.985581Z [36m12:04:27.985[0;39m [1;30m[nioEventLoopGroup-1-7][0;39m [34mINFO [0;39m [35mcom.roihunter.app.HelloWorld[0;39m - Using url /act_984750788289990/insights 
A 2020-02-07T12:04:27.985645Z [pid    22] write(1, "\33[36m12:04:27.985\33[0;39m \33[1;30m"..., 143) = 143 
A 2020-02-07T12:04:27.985666Z [pid    22] write(1, "\33[36m12:04:27.985\33[0;39m \33[1;30m"..., 165) = 165 
A 2020-02-07T12:04:27.985766Z [36m12:04:27.985[0;39m [1;30m[nioEventLoopGroup-1-7][0;39m [34mINFO [0;39m [35mcom.roihunter.app.HelloWorld[0;39m - Finished calling fb api 
A 2020-02-07T12:04:27.985836Z [pid    22] write(1, "\33[36m12:04:27.985\33[0;39m \33[1;30m"..., 149) = 149 
A 2020-02-07T12:04:27.986363Z [pid    22] write(4, "HTTP/1.1 200 OK\r\ntransfer-encodi"..., 143) = 143 
E 2020-02-07T12:04:27.988626Z GET 503 546 B 31 ms curl/7.66.0 https://app-5phkf6s3jq-ez.a.run.app/api/fb GET 503 546 B 31 ms curl/7.66.0 
A 2020-02-07T12:04:27.989104Z [pid    22] writev(4, [{iov_base="1\r\n", iov_len=3}, {iov_base="[", iov_len=1}, {iov_base="\r\n", iov_len=2}], 3) = 6 
A 2020-02-07T12:04:27.989513Z [pid    22] mmap(NULL, 8392704, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x3e60eec00000 
A 2020-02-07T12:04:27.989534Z [pid    22] mprotect(0x3e60eec00000, 4096, PROT_NONE) = 0 
A 2020-02-07T12:04:27.989732Z [pid    22] clone(/usr/bin/strace: Process 23 attached 
A 2020-02-07T12:04:27.989753Z  <unfinished ...> 
A 2020-02-07T12:04:27.989873Z [pid    23] set_robust_list(0x3e60ef4009e0, 24) = -1 ENOSYS (Function not implemented) 
A 2020-02-07T12:04:27.989884Z [pid    23] sched_getaffinity(23, 32, [0, 1]) = 8 
A 2020-02-07T12:04:27.989964Z [pid    23] futex(0x3b4c0c4, FUTEX_CMP_REQUEUE_PRIVATE, 1, 2147483647, 0x3b4c068, 32) = 1 
A 2020-02-07T12:04:27.990286Z [pid    23] mmap(NULL, 2097152, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...> 
A 2020-02-07T12:04:27.990303Z [pid    22] <... clone resumed> child_stack=0x3e60ef3fffb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x3e60ef4009d0, tls=0x3e60ef400700, child_tidptr=0x3e60ef4009d0) = 23 
A 2020-02-07T12:04:27.990311Z [pid    23] <... mmap resumed> )        = 0x3e60eea00000 
A 2020-02-07T12:04:27.990320Z [pid     3] <... futex resumed> )       = 0 
A 2020-02-07T12:04:27.990327Z [pid     3] futex(0x3b4c068, FUTEX_WAKE_PRIVATE, 1) = 0 
A 2020-02-07T12:04:27.990335Z [pid     3] futex(0x3b4c0c4, FUTEX_WAIT_PRIVATE, 33, NULL <unfinished ...> 
A 2020-02-07T12:04:27.990345Z [pid    23] munmap(0x3e60eeb00000, 1048576) = 0 
A 2020-02-07T12:04:27.990423Z [pid    22] socket(AF_INET6, SOCK_STREAM, IPPROTO_IP <unfinished ...> 
A 2020-02-07T12:04:27.990435Z [pid    23] prctl(PR_SET_NAME, "ionThreadPool-1"... <unfinished ...> 
A 2020-02-07T12:04:27.990446Z [pid    22] <... socket resumed> )      = 80 
A 2020-02-07T12:04:27.990668Z [pid    22] setsockopt(80, SOL_IPV6, IPV6_V6ONLY, [0], 4) = 0 
A 2020-02-07T12:04:27.990684Z [pid    23] <... prctl resumed> )       = 0 

I tried using a different library to make the call, Java 11 Http client, and it works. But whatever I do, it doesn't work when I use Micronaut Http client, and there is no helpful message.

I also tried using Java 8, a different linux distribution in the docker image (fedora:latest, ubuntu:latest), but it didn't help.

Do you have any idea what could be causing this?

Thanks for you answer in advance.

VaclavDedik
  • 1,960
  • 4
  • 20
  • 27
  • There is an official [Cloud Run sample in the Micronaut repo](https://github.com/micronaut-projects/micronaut-gcp/tree/master/examples/hello-world-cloud-run), can you see if this one runs for you? and if so, try to understand the difference with your code – Steren Feb 07 '20 at 18:25
  • When you try it on your local environment, did you try the Docker image or directly your code? – guillaume blaquiere Feb 07 '20 at 19:37
  • @Steren That application is not calling any external API, like mine does. My "hello world" endpoint works just fine, it crashes when I call the endpoint that sends an Http request. – VaclavDedik Feb 07 '20 at 20:37
  • @guillaumeblaquiere I tried both the Docker image and directly my code, both with graalvm and without. Worked every single time. – VaclavDedik Feb 07 '20 at 20:37
  • @Steren, I also so some strange things when I performed external call in Python. I have to take time to fill a bug. – guillaume blaquiere Feb 07 '20 at 20:45
  • @VaclavDedik, did you see any gvisor message in the Cloud Run logs? – guillaume blaquiere Feb 07 '20 at 20:45
  • Indeed, if you suspect a bug, please provide a minimal example that reproduces and provide all details in an issue at https://cloud.google.com/support/docs/issue-trackers – Steren Feb 08 '20 at 21:03
  • I tried to reproduce your error, but I couldn't. Here is what I made from your description: https://github.com/benjaminkomen/micronaut. The main difference is I didn't use Kotlin, but I don't think that this should matter (I will try to use Kotlin to see if it does). – Benjamin Feb 09 '20 at 15:46
  • @Benjamin Wow, thank you so much for trying that. I tried to deploy your app, but it didn't work. So I tried to disable authentication and it started working! But only because I was suddenly not using curl to send the request, but browser. So what I found out is that it works in browser, but not when I make the request in curl or insomnia etc. Makes no sense, how is that possible? I tried to include the same headers as my browser is sending, but it doesn't help :( – VaclavDedik Feb 10 '20 at 09:40
  • So it looks like it is done by header "accept-encoding: gzip, deflate, br".. so I have to gunzip the output to not crash the server, that's ridiculous :D – VaclavDedik Feb 10 '20 at 09:47
  • Another interesting thing is that if I block the reactive call with blockingFirst like this `httpClient.retrieve(url).blockingFirst()` instead of `httpClient.retrieve(url)` in the controller, it works OK. So this bug is getting stranger and stranger :( – VaclavDedik Feb 10 '20 at 12:29
  • I am now wondering if this is a google cloud run bug or micronaut bug :/ – VaclavDedik Feb 10 '20 at 12:31
  • If I use `httpClient.retrieve(url).firstElement()`, it also works, so there seems to be something wrong with specifically returning Flowable as a response. If I return simply `Flowable.fromIterable(setOf("hello", "world"))`, it also crashes. – VaclavDedik Feb 10 '20 at 14:35
  • @VaclavDedik, I'm glad I could be of help :) I also saw some warnings during that native image complication step, e.g. "Warning: class initialization of class io.micronaut.http.bind.binders.ContinuationArgumentBinder failed with exception java.lang.NoClassDefFoundError: kotlin/TypeCastException. This class will be initialized at run time because option --allow-incomplete-classpath is used for image building. Use the option --initialize-at-run-time=io.micronaut.http.bind.binders.ContinuationArgumentBinder to explicitly request delayed initialization of this class." – Benjamin Feb 10 '20 at 15:17
  • @Benjamin Yeah, I saw that too when using your app. My app doesn't do that, maybe that is because my app uses Kotlin, but who knows :) – VaclavDedik Feb 10 '20 at 15:44

4 Answers4

13

ok. Since this is the only question that comes up whenever someone searches for cloud run 503 error, I would like to share my horrible experience and solution.

I deployed my spring boot app on cloud run and configured pub-sub push subscription trigger for it. I tested it on my local system. Then i tested it on my private GCP project. And then when i deployed it on my organization's GCP account, I started to get 503 error.

After this, below is what i tried.

  1. I tried changing the roles/permissions of the default service account (which was used by the run instance) by giving max permissions, but it was of no use.
  2. Tried changing the concurrency to 1 (as suggested in GCP docs) but no help.
  3. I started comparing each and every config of my run instance with my private GCP run instance, And then i found out that I mistakenly had checked below option which was causing this issue.

enter image description here

So i unchecked it, redeployed my instance, and it worked.

Manish Bansal
  • 2,400
  • 2
  • 21
  • 37
  • can you please mention where can I see the part of connections in GCP as shown in the screenshot? – Abdulkader Khateeb Feb 13 '21 at 09:27
  • 3
    You need to go to cloud run, click on your service, click on edit button on top, and then this page opens. (Button names may be different as i dont remember, but you will be able to find). – Manish Bansal Feb 13 '21 at 09:29
  • I accessed this page and the option is already unchecked, unfortunately, the problem of (503 Service Unavailable) still exists – Abdulkader Khateeb Feb 13 '21 at 09:50
  • 1
    That was a HUGE help. Absolutely the issue and I'm not even using this setup. Thank you!!!! – MTT Apr 30 '21 at 12:14
  • Thank you very much. I've documented this issue in Google's issue tracker here: https://issuetracker.google.com/issues/194314805. Please go upvote it! – Raman Jul 22 '21 at 16:34
4

In my case I simply made a syntax error in my python code inside my Cloud Run instance and I got the 503 Service Unavailable error.

This showed up in the logs. Go to your Cloud Run Service and click "Logs" for more details.

August Kimo
  • 1,503
  • 8
  • 17
  • 2
    Yes! Checking my cloud instance's logs helped me find the problem. I wouldn't have known about it without your comment, thanks. – billiam Jun 20 '22 at 16:10
1

So it looks like the core of the issue is that Cloud Run currently does not support HTTP streaming:

https://cloud.google.com/run/docs/issues#grpc_websocket

And when I return Flowable with Micronauts, it is opening a streamable HTTP connection. So the solution is not to use Flowable (or any other Publisher that is unbounded e.g. Flow, Flux etc) as a response.

VaclavDedik
  • 1,960
  • 4
  • 20
  • 27
0

I had the same problem with Google Cloud Run returning HTTP 503 Service Unavailable, but Manish Bansal's solution haven't worked.

It ended up being a change in the YAML file that the 'docker deploy run' did not updated properly. In my case, it was the "spec.template.spec.containers.ports.containerPort" property.

It is worth take a look at that.