The following code:
#include <stdio.h>
int main(void) {
char buf[80] = "salam";
const char *fmt = "\"%79[^\"]\"";
int n;
n = sscanf("\"\"", fmt, buf);
printf("%d fields were read. buf="%s"\n", n, buf);
n = sscanf("\"hamidi\"", fmt, buf);
printf("%d fields were read. buf="%s"\n", n, buf);
}
outputs:
0 fields were read. buf=”salam”
1 fields were read. buf=”hamidi”
This indicates that the sscanf function can’t read empty strings with the specified format string. Is there a good replacement for the regex format string to read strings with length ZERO to at most 79 characters by the sscanf function? In another words, I expect the first sscanf insert ‘\0’ to buf[0] and return 1 instead of 0. Is there a way?
Indeed sscanf
and friends cannot handle empty fields, and neither
can strtok
. There is no direct alternative in the Standard library, but you can write a simple scanner with a loop or using strchr
, strpbrk
or strcspn
.
Here is a simple example:
#include <stdio.h>
#include <string.h>
int scan_string(char *dest, size_t size, const char *src, const char **endp) {
if (*src++ != '"') {
// no string delimiter
return 0;
}
size_t len = strcspn(src, "\"");
if (src[len] != '"') {
// no matching string delimiter
return 0;
}
if (endp) {
*endp = src + len + 1;
}
if (size > 0) {
if (len >= size)
len = size - 1;
memcpy(dest, src, len);
dest[len] = '\0';
}
return 1;
}
void test(const char *src) {
char buf[80] = "<unchanged>";
const char *end = "<unchanged>";
int n = scan_string(buf, sizeof buf, src, &end);
printf("src: '%s', n: %d, buf: '%s', end: '%s'\n",
src, n, buf, end);
}
int main(void) {
test("");
test("\"");
test("\"\"");
test("''");
test("\"hamidi\"");
test("\"Hello\" \"world\"");
test("\"\\\"\"");
test("\"Hello world\\n\"");
return 0;
}
Output:
src: '', n: 0, buf: '<unchanged>', end: '<unchanged>'
src: '"', n: 0, buf: '<unchanged>', end: '<unchanged>'
src: '""', n: 1, buf: '', end: ''
src: '''', n: 0, buf: '<unchanged>', end: '<unchanged>'
src: '"hamidi"', n: 1, buf: 'hamidi', end: ''
src: '"Hello" "world"', n: 1, buf: 'Hello', end: ' "world"'
src: '"\""', n: 1, buf: '\', end: '"'
src: '"Hello world\n"', n: 1, buf: 'Hello world\n', end: ''
Here is a slightly more complicated version that handles some \
sequences and different separators:
int scan_string(char *dest, size_t size, const char *src, const char **endp) {
char sep = *src++;
char ch;
size_t i = 0;
// handle both single and double quotes
if (sep != '"' && sep != '\'')
return 0;
while ((ch = *src++) != sep) {
if (ch == '\0') {
if (i < size)
dest[i] = '\0';
return 0;
}
if (ch == '\\' && *src != '\0') {
switch (ch = *src++) {
case 'f': ch="\f"; break;
case 'n': ch="\n"; break;
case 'r': ch="\r"; break;
case 't': ch="\t"; break;
case 'v': ch="\v"; break;
// handle octal and hex sequences...
default: ch = *src++; break;
}
}
if (i + 1 < size)
dest[i++] = ch;
}
if (i < size)
dest[i] = '\0';
if (endp)
*endp = src;
return 1;
}
Output:
src: '', n: 0, buf: '<unchanged>', end: '<unchanged>'
src: '"', n: 0, buf: '', end: '<unchanged>'
src: '""', n: 1, buf: '', end: ''
src: '''', n: 1, buf: '', end: ''
src: '"hamidi"', n: 1, buf: 'hamidi', end: ''
src: '"Hello" "world"', n: 1, buf: 'Hello', end: ' "world"'
src: '"\""', n: 0, buf: '"', end: '<unchanged>'
src: '"Hello world\n"', n: 1, buf: 'Hello world
', end: ''
do not use
scanf
to get stringsregex format string The scanf family of functions don’t use regular expressions.