0

I am testing a serial communication protocol based on printable characters only. The setup has a pc connected to an arduino board by USB. The PC USB serial is operated in canonical mode, with no echo, no flow control, 9600 baud. Since a read timeout is requested, pselect is called before the serial read. The arduino board simply echoes back every received character without any processing. The PC OS is Linux Neon with kernel 5.13.0-40-generic.

When lines of a specific length are transmitted from the PC and echoed back by the arduino, they are received correctly except for the final new line that is missing. A further read, returns an empty line (the previously missing NL). Lines with different length are transmitted and received correctly, including the trailing NL.

This behavior is fully repeatable and stable. The following code reproduce the problem for a line transmitted with a length of 65 characters (including NL) and received with a length of 64 (NL missing). Other line lengths work fine.

Thanks for any hints.

/* remote serial loop test 20220626 */

#include <errno.h>
#include <fcntl.h> 
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/select.h>
#include <string.h>
#include <termios.h>
#include <time.h>
#include <unistd.h>


#define TX_MINLEN 63
#define TX_MAXLEN 66
#define DATA_MAXLEN 128

#define LINK_DEVICE "/dev/ttyUSB0"
#define LINK_SPEED B9600
#define RECEIVE_TIMEOUT 2000



int main()
{
    int wlen;
    int retval;
    int msglen;

    uint8_t tx_data[257] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
    for (int i=16; i < 256; i++) tx_data[i] = tx_data[i & 0xf];
    uint8_t rx_data[257];

  /* serial interface */
  char * device;
  int speed;
  int fd;
  fd_set fdset;
  struct timespec receive_timeout;
  struct timespec *p_receive_timeout = &receive_timeout;
  struct termios tty;

  /* open serial device in blocking mode */
  fd = open(LINK_DEVICE, O_RDWR | O_NOCTTY);
  if (fd < 0) {
    printf("Error opening %s: %s\n",LINK_DEVICE,strerror(errno));
    return -1;
  }

  /* prepare serial read by select to have read timeout */
  FD_ZERO(&(fdset));
  FD_SET(fd,&(fdset));

  if (RECEIVE_TIMEOUT >= 0) {
    p_receive_timeout->tv_sec = RECEIVE_TIMEOUT / 1000;
    p_receive_timeout->tv_nsec = RECEIVE_TIMEOUT % 1000 * 1000000;
  }
  else
    p_receive_timeout = NULL;

  /* get termios structure */
  if (tcgetattr(fd, &tty) < 0) {
      printf("Error from tcgetattr: %s\n", strerror(errno));
      return -1;
  }

  /* set tx and rx baudrate */
  cfsetospeed(&tty, (speed_t)LINK_SPEED);
  cfsetispeed(&tty, (speed_t)LINK_SPEED);

  /* set no modem ctrl, 8 bit, no parity, 1 stop */
  tty.c_cflag |= (CLOCAL | CREAD);    /* ignore modem controls */
  tty.c_cflag &= ~CSIZE;
  tty.c_cflag |= CS8;         /* 8-bit characters */
  tty.c_cflag &= ~PARENB;     /* no parity bit */
  tty.c_cflag &= ~CSTOPB;     /* only need 1 stop bit */
  tty.c_cflag &= ~CRTSCTS;    /* no hardware flowcontrol */

  /* canonical mode: one line at a time (\n is line terminator) */
  tty.c_lflag |= ICANON | ISIG;
  tty.c_lflag &= ~(ECHO | ECHOE | ECHONL | IEXTEN);

  /* input control */
  tty.c_iflag &= ~IGNCR;  /* preserve carriage return */
  tty.c_iflag &= ~INPCK;  /* no parity checking */
  tty.c_iflag &= ~INLCR;  /* no NL to CR traslation */
  tty.c_iflag &= ~ICRNL;  /* no CR to NL traslation */
  tty.c_iflag &= ~IUCLC;  /* no upper to lower case mapping */
  tty.c_iflag &= ~IMAXBEL;/* no ring bell at rx buffer full */
  tty.c_iflag &= ~(IXON | IXOFF | IXANY);/* no SW flowcontrol */

  /* no output remapping, no char dependent delays */
  tty.c_oflag = 0;

  /* no additional EOL chars, confirm EOF to be 0x04 */
  tty.c_cc[VEOL] = 0x00;
  tty.c_cc[VEOL2] = 0x00;
  tty.c_cc[VEOF] = 0x04;

  /* set changed attributes really */
  if (tcsetattr(fd, TCSANOW, &tty) != 0) {
      printf("Error from tcsetattr: %s\n", strerror(errno));
      return -1;
  }

  /* wait for serial link hardware to settle, required by arduino reset
   * triggered by serial control lines */
  sleep(2);

  /* empty serial buffers, both tx and rx */
  tcflush(fd,TCIOFLUSH);


  /* repeat transmit and receive, each time reducing data length by 1 char */
  for (int l=TX_MAXLEN; l > TX_MINLEN - 1; l--) {

    /* prepare data: set EOL and null terminator for current length */
    tx_data[l] = '\n';
    tx_data[l+1] = 0;

    /* send data */
    int sent = write(fd,tx_data,l+1);

    /* receive data */

    /* wait for received data or for timeout */
    retval = pselect(fd+1,&(fdset),NULL,NULL,p_receive_timeout,NULL);

    /* check for error or timeout */
    if (retval < 0)
      printf("pselect error: %d - %s\n",retval,strerror(errno));
    else if (retval == 0)
      printf("serial read timeout\n");

    /* there is enough data for a non block read: do read */
    msglen = read(fd,&rx_data,DATA_MAXLEN);

    /* check rx data length */  
    if (msglen != l+1)
      printf("******** RX ERROR: sent %d, received %d\n",l+1,msglen);
    else
      continue;

    /* check received data, including new line if present */
    for (int i=0; i < msglen; i++) {
      if (tx_data[i] == rx_data[i])
        continue;
      else {
        printf("different rx data:|%s|\n",rx_data);
        break;
      }
    }

    /* clear RX buffer */
    for (int i=0; i < msglen + 1; i++) rx_data[i] = 0;

  }
}


Marcus
  • 159
  • 1
  • 1
  • 8

2 Answers2

0

When performing stream-based communication, be it via pipes, serial ports, or TCP sockets, you can never rely on reads to always return a full "unit of transmission" (in this case a line, but could also be a fixed-size block). The reason is that stream-based communication can always be split by any part of the transmission stack (even potentially the sender) into multiple blocks, and there is never a guarantee that a single read will always read a full block.

For example, you could always run into the race condition that your microcontroller is still sending parts of the message when you call read(), so not all characters are read exactly. In your case, that's not what you're seeing, because that would be more of a stochastic phenomenon (that would be worse with an increased interrupt load on the computer) and not so easily reproducible. Instead, because you're talking about the number 64 here, you're running into the static buffer size used in the kernel's tty driver that will only ever return at most 64 bytes at once, regardless of what the specified read size actually is. However, in other cases it could still be that you'll see additional failures by the kernel returning only the first couple of characters of a line in the first read(), and the rest in the second, depending on precise timing details -- you've probably not seen that yet, but it's bound to happen at some point.

The only reliable way to properly implement communication protocols in streaming situations (serial port, pipes, TCP sockets, etc.) is to consider the following:

  • For fixed-size data (e.g. communication units that are always N bytes in size) to loop around a read() call until you've read exactly the right amount of bytes (reads that follow an incomplete read would obviously ask for less bytes than the original read, just to make up the difference)
  • For variable-size data (for example communication units that are separated by a line end character) you have two options: either you read only one character at a time until you reach the end-of-line character (inefficient, uses lots of syscalls), or you keep track of the communication state via a large enough buffer that you constantly fill with read() operations until the buffer contains a line-end character, at which point you remove that line from the buffer (but keep the rest) and process that.

As a complete aside, if you're doing anything with serial communication, I can very much recommend the excellent libserialport library (LGPLv3 license) that makes working with serial ports a lot easier -- and has the benefit of being cross-platform. (Doesn't help with your issue, just thought that I'd mention it.)

chris_se
  • 1,006
  • 1
  • 7
  • Since the reading of the serial port is done in canonical mode, it will be blocking untill the whole line (in cluding the nl character) is received, as stated by the termios manual – Marcus Jun 27 '22 at 09:24
  • From the termios manual "Input is made available line by line. An input line is available when one of the line delimiters is typed (NL, EOL, EOL2; or EOF at the start of line). Except in the case of EOF, the line delimiter is included in the buffer returned by read(2)." – Marcus Jun 27 '22 at 09:31
  • Moreover the line buffer size is 4096, far beyond the line sizes used by the test. Again from the termios manual "The maximum line length is 4096 chars (including the terminating newline character); lines longer than 4096 chars are truncated. After 4095 char‐ acters, input processing (e.g., ISIG and ECHO* processing) continues, but any input data after 4095 characters up to (but not including) any termi‐ nating newline is discarded. This ensures that the terminal can always receive more input until at least one line can be read." – Marcus Jun 27 '22 at 09:33
0

Upgrading from linux kernel version 5.13.0-40-generic to 5.13.0-51-generic solved the problem.

Marcus
  • 159
  • 1
  • 1
  • 8