libmodbus icon indicating copy to clipboard operation
libmodbus copied to clipboard

modbus-rtu.c: replace assert() with errno

Open kuhrusty opened this issue 6 years ago • 11 comments

Hey, the assert() in modbus-rtu.c causes the entire program to abort when the RTU slave ID hasn't been set, which is a bummer.

Here are changes to libmodbus-3.1.6 which seem to do what I want (cause a later modbus_read_registers() to fail with the message "Slave not set"). I did not look very closely at the places where I added return -1 to confirm that it was appropriate, but all tests still pass...

--- src/modbus.h.orig   2019-12-09 12:15:24.000000000 -0800
+++ src/modbus.h        2019-12-09 12:41:49.000000000 -0800
@@ -126,6 +126,7 @@
     MODBUS_EXCEPTION_NOT_DEFINED,
     MODBUS_EXCEPTION_GATEWAY_PATH,
     MODBUS_EXCEPTION_GATEWAY_TARGET,
+    MODBUS_EXCEPTION_NO_SLAVE_SET,
     MODBUS_EXCEPTION_MAX
 };
 
@@ -139,14 +140,15 @@
 #define EMBXMEMPAR (MODBUS_ENOBASE + MODBUS_EXCEPTION_MEMORY_PARITY)
 #define EMBXGPATH  (MODBUS_ENOBASE + MODBUS_EXCEPTION_GATEWAY_PATH)
 #define EMBXGTAR   (MODBUS_ENOBASE + MODBUS_EXCEPTION_GATEWAY_TARGET)
+#define EMBXNSLAVE (MODBUS_ENOBASE + MODBUS_EXCEPTION_NO_SLAVE_SET)
 
 /* Native libmodbus error codes */
-#define EMBBADCRC  (EMBXGTAR + 1)
-#define EMBBADDATA (EMBXGTAR + 2)
-#define EMBBADEXC  (EMBXGTAR + 3)
-#define EMBUNKEXC  (EMBXGTAR + 4)
-#define EMBMDATA   (EMBXGTAR + 5)
-#define EMBBADSLAVE (EMBXGTAR + 6)
+#define EMBBADCRC  (EMBXNSLAVE + 1)
+#define EMBBADDATA (EMBXNSLAVE + 2)
+#define EMBBADEXC  (EMBXNSLAVE + 3)
+#define EMBUNKEXC  (EMBXNSLAVE + 4)
+#define EMBMDATA   (EMBXNSLAVE + 5)
+#define EMBBADSLAVE (EMBXNSLAVE + 6)
 
 extern const unsigned int libmodbus_version_major;
 extern const unsigned int libmodbus_version_minor;
--- src/modbus.c.orig   2019-12-09 12:16:40.000000000 -0800
+++ src/modbus.c        2019-12-09 12:36:07.000000000 -0800
@@ -63,6 +63,8 @@
         return "Gateway path unavailable";
     case EMBXGTAR:
         return "Target device failed to respond";
+    case EMBXNSLAVE:
+        return "Slave not set";
     case EMBBADCRC:
         return "Invalid CRC";
     case EMBBADDATA:
@@ -1048,6 +1050,9 @@
     uint8_t rsp[MAX_MESSAGE_LENGTH];
 
     req_length = ctx->backend->build_request_basis(ctx, function, addr, nb, req);
+    if (req_length == -1) {
+        return -1;
+    }
 
     rc = send_msg(ctx, req, req_length);
     if (rc > 0) {
@@ -1159,6 +1164,9 @@
     }
 
     req_length = ctx->backend->build_request_basis(ctx, function, addr, nb, req);
+    if (req_length == -1) {
+        return -1;
+    }
 
     rc = send_msg(ctx, req, req_length);
     if (rc > 0) {
@@ -1250,6 +1258,9 @@
     }
 
     req_length = ctx->backend->build_request_basis(ctx, function, addr, (int) value, req);
+    if (req_length == -1) {
+        return -1;
+    }
 
     rc = send_msg(ctx, req, req_length);
     if (rc > 0) {
@@ -1317,6 +1328,9 @@
     req_length = ctx->backend->build_request_basis(ctx,
                                                    MODBUS_FC_WRITE_MULTIPLE_COILS,
                                                    addr, nb, req);
+    if (req_length == -1) {
+        return -1;
+    }
     byte_count = (nb / 8) + ((nb % 8) ? 1 : 0);
     req[req_length++] = byte_count;
 
@@ -1379,6 +1393,9 @@
     req_length = ctx->backend->build_request_basis(ctx,
                                                    MODBUS_FC_WRITE_MULTIPLE_REGISTERS,
                                                    addr, nb, req);
+    if (req_length == -1) {
+        return -1;
+    }
     byte_count = nb * 2;
     req[req_length++] = byte_count;
 
@@ -1413,6 +1430,9 @@
     req_length = ctx->backend->build_request_basis(ctx,
                                                    MODBUS_FC_MASK_WRITE_REGISTER,
                                                    addr, 0, req);
+    if (req_length == -1) {
+        return -1;
+    }
 
     /* HACKISH, count is not used */
     req_length -= 2;
@@ -1480,6 +1500,9 @@
     req_length = ctx->backend->build_request_basis(ctx,
                                                    MODBUS_FC_WRITE_AND_READ_REGISTERS,
                                                    read_addr, read_nb, req);
+    if (req_length == -1) {
+        return -1;
+    }
 
     req[req_length++] = write_addr >> 8;
     req[req_length++] = write_addr & 0x00ff;
@@ -1531,6 +1554,9 @@
 
     req_length = ctx->backend->build_request_basis(ctx, MODBUS_FC_REPORT_SLAVE_ID,
                                                    0, 0, req);
+    if (req_length == -1) {
+        return -1;
+    }
 
     /* HACKISH, addr and count are not used */
     req_length -= 4;
--- src/modbus-rtu.c.orig       2019-12-09 12:12:42.000000000 -0800
+++ src/modbus-rtu.c    2019-12-09 12:42:24.000000000 -0800
@@ -12,7 +12,6 @@
 #ifndef _MSC_VER
 #include <unistd.h>
 #endif
-#include <assert.h>
 
 #include "modbus-private.h"
 
@@ -107,7 +106,10 @@
                                            int addr, int nb,
                                            uint8_t *req)
 {
-    assert(ctx->slave != -1);
+    if (ctx->slave == -1) {
+        errno = EMBXNSLAVE;
+        return -1;
+    }
     req[0] = ctx->slave;
     req[1] = function;
     req[2] = addr >> 8;

kuhrusty avatar Dec 09 '19 21:12 kuhrusty

IMO the assert is correct, as it's a library user failure, not an error with the remote device.

karlp avatar Dec 10 '19 10:12 karlp

I think you have to call modbus_set_slave() before modbus_read_registers()

JoelStienlet avatar Dec 10 '19 14:12 JoelStienlet

I think you have to call modbus_set_slave() before modbus_read_registers()

Sure; you have to call modbus_connect() before modbus_read_registers(), too. Why is it good for one error to fail gracefully while the other aborts your program?

kuhrusty avatar Dec 10 '19 18:12 kuhrusty

Sidenote: the assert would usually go away when the library is compiled without "debug mode" and then ctx->slave is still -1 which would result in req[0] = 0xff as address with no caller notification at all... I'm a little bit unsure at the moment whether there is a "standard autotools way" to define NDEBUG to achieve such a "non-debug mode", but I also don't see any libmodbus infrastructure to do so...

mhei avatar Dec 10 '19 20:12 mhei

Sidenote: the assert would usually go away when the library is compiled without "debug mode" and then ctx->slave is still -1 which would result in req[0] = 0xff as address with no caller notification at all...

Yeah, compiling the code as-is with NDEBUG=1 gives a "Connection timed out" when you go to read values, which (to me) is a lot less helpful than the "Slave not set" error message added by my proposed changes above.

I'm a little bit unsure at the moment whether there is a "standard autotools way" to define NDEBUG to achieve such a "non-debug mode"

This just worked for me:

$ ./configure --prefix=... CPPFLAGS=-DNDEBUG=1

kuhrusty avatar Dec 10 '19 20:12 kuhrusty

Why is it good for one error to fail gracefully while the other aborts your program? Because one of them is entirely developer code error. "you must set slave id first" and the others are expected normal behaviour (slave not present, slave has errors, wiring is wrong, registers not available)

I would agree that this is a "don't hold it wrong" and "the handle provided is sharp" but leave asserts on when developing? That's what they're for, and then when you're "holding it right" you can freely turn them off.

karlp avatar Dec 11 '19 00:12 karlp

I would agree that this is a "don't hold it wrong" and "the handle provided is sharp" but leave asserts on when developing? That's what they're for, and then when you're "holding it right" you can freely turn them off.

I can't turn them off at runtime; I'm wrapping libmodbus in a PHP extension, and having PHP developers use one version for development, and another for deployment on devices in the field, makes me real nervous. You can't trust PHP developers to hold anything right.

kuhrusty avatar Dec 11 '19 00:12 kuhrusty

I think it would be better to make the necessary checks in the wrapper with: int modbus_get_slave(modbus_t *ctx); before every call of modbus_read_registers().

JoelStienlet avatar Dec 11 '19 09:12 JoelStienlet

I think it would be better to make the necessary checks in the wrapper with: int modbus_get_slave(modbus_t *ctx); before every call of modbus_read_registers().

But then I also need to know (and keep track of) whether this modbus_t came from a backend which requires a slave ID. Duplicating logic from libmodbus in my code does not seem as good as keeping the logic in libmodbus. Each backend knows whether it has what it needs; why not have it fail with a useful error message instead of an abort (or, in the NDEBUG case, an unhelpful message)?

kuhrusty avatar Dec 11 '19 10:12 kuhrusty

This change will not break any existing code I suppose. But once the change is made, there is no going back, as reverting from errno to assert() could break newer code. I think the choice is mostly philosophical: do we want this error to be considered a critical design error, or do we want it to be considered a minor error? We can also argue that using errno leaves this decision to the user of the library, and avoids making that decision in his place, hence providing more flexibility to the library.

JoelStienlet avatar Dec 11 '19 15:12 JoelStienlet

Some additional thoughts: assert() could also cause problems in other situations:

  • it makes it more difficult to perform proper logging of the error.
  • we don't know if some resources need to be freed, and terminating the process abruptly will prevent them from being freed. (for example, if this is a slave that has grabbed the RS485 line, terminating the process abruptly will keep the line locked...) Of course in these two situations it's possible to catch SIGABRT, but the handling will be rough. Real world programs get complicated, and even if modbus_set_slave() was always called before modbus_read_registers() during the tests, some unforeseen circumstances (Murphy's law) will decide otherwise in production. In the case we're dealing with here, there is no corruption of any internal datastructure, no risk of undefined behaviour, so no real reason to force a harsh termination.

JoelStienlet avatar Dec 13 '19 11:12 JoelStienlet