Get zero or more length string by using sscanf

The following code:

#include <stdio.h>

int main(void) {
  char buf[80] = "salam";
  const char *fmt = "\"%79[^\"]\"";
  int n;

  n = sscanf("\"\"", fmt, buf);
  printf("%d fields were read. buf="%s"\n", n, buf);
  n = sscanf("\"hamidi\"", fmt, buf);
  printf("%d fields were read. buf="%s"\n", n, buf);
}

outputs:

0 fields were read. buf=”salam”
1 fields were read. buf=”hamidi”

This indicates that the sscanf function can’t read empty strings with the specified format string. Is there a good replacement for the regex format string to read strings with length ZERO to at most 79 characters by the sscanf function? In another words, I expect the first sscanf insert ‘\0’ to buf[0] and return 1 instead of 0. Is there a way?

  • 2

    do not use scanf to get strings

    – 

  • regex format string The scanf family of functions don’t use regular expressions.

    – 




Indeed sscanf and friends cannot handle empty fields, and neither
can strtok. There is no direct alternative in the Standard library, but you can write a simple scanner with a loop or using strchr, strpbrk or strcspn.

Here is a simple example:

#include <stdio.h>
#include <string.h>

int scan_string(char *dest, size_t size, const char *src, const char **endp) {
    if (*src++ != '"') {
        // no string delimiter
        return 0;
    }
    size_t len = strcspn(src, "\"");
    if (src[len] != '"') {
        // no matching string delimiter
        return 0;
    }
    if (endp) {
        *endp = src + len + 1;
    }
    if (size > 0) {
        if (len >= size)
            len = size - 1;
        memcpy(dest, src, len);
        dest[len] = '\0';
    }
    return 1;
}

void test(const char *src) {
    char buf[80] = "<unchanged>";
    const char *end = "<unchanged>";
    int n = scan_string(buf, sizeof buf, src, &end);
    printf("src: '%s', n: %d, buf: '%s', end: '%s'\n",
           src, n, buf, end);
}

int main(void) {
    test("");
    test("\"");
    test("\"\"");
    test("''");
    test("\"hamidi\"");
    test("\"Hello\" \"world\"");
    test("\"\\\"\"");
    test("\"Hello world\\n\"");
    return 0;
}

Output:

src: '', n: 0, buf: '<unchanged>', end: '<unchanged>'
src: '"', n: 0, buf: '<unchanged>', end: '<unchanged>'
src: '""', n: 1, buf: '', end: ''
src: '''', n: 0, buf: '<unchanged>', end: '<unchanged>'
src: '"hamidi"', n: 1, buf: 'hamidi', end: ''
src: '"Hello" "world"', n: 1, buf: 'Hello', end: ' "world"'
src: '"\""', n: 1, buf: '\', end: '"'
src: '"Hello world\n"', n: 1, buf: 'Hello world\n', end: ''

Here is a slightly more complicated version that handles some \ sequences and different separators:

int scan_string(char *dest, size_t size, const char *src, const char **endp) {
    char sep = *src++;
    char ch;
    size_t i = 0;

    // handle both single and double quotes
    if (sep != '"' && sep != '\'')
        return 0;
    while ((ch = *src++) != sep) {
        if (ch == '\0') {
            if (i < size)
                dest[i] = '\0';
            return 0;
        }
        if (ch == '\\' && *src != '\0') {
            switch (ch = *src++) {
            case 'f': ch="\f"; break;
            case 'n': ch="\n"; break;
            case 'r': ch="\r"; break;
            case 't': ch="\t"; break;
            case 'v': ch="\v"; break;
            // handle octal and hex sequences...
            default: ch = *src++; break;
            }
        }
        if (i + 1 < size)
            dest[i++] = ch;
    }
    if (i < size)
        dest[i] = '\0';
    if (endp)
        *endp = src;
    return 1;
}

Output:

src: '', n: 0, buf: '<unchanged>', end: '<unchanged>'
src: '"', n: 0, buf: '', end: '<unchanged>'
src: '""', n: 1, buf: '', end: ''
src: '''', n: 1, buf: '', end: ''
src: '"hamidi"', n: 1, buf: 'hamidi', end: ''
src: '"Hello" "world"', n: 1, buf: 'Hello', end: ' "world"'
src: '"\""', n: 0, buf: '"', end: '<unchanged>'
src: '"Hello world\n"', n: 1, buf: 'Hello world
', end: ''

Leave a Comment