4 in 1
うっかり glibc の strlen を覗いたら面白いことになっていたので、実験。
$ cat test.c #include <stdlib.h> #include <string.h> size_t strlen_naive(const char *str) { const char *head = str; while (*str) str++; return (str - head); } size_t strlen_fast(const char *str) { const char *head = str; unsigned long word1, word2, *wordp; while ((unsigned int)str & (sizeof(word1) - 1)) { if (!*str) return str - head; str++; } word1 = 0x01010101UL, word2 = 0x80808080UL; if (sizeof(word1) > 4) word1 = (0x01010101UL << 32) | 0x01010101UL, word2 = (0x80808080UL << 32) | 0x80808080UL; wordp = (unsigned long *)str; for (;;) { if ((*wordp - word1) & word2) { str = (const char *)wordp; if (!str[0]) return str - head; if (!str[1]) return str - head + 1; if (!str[2]) return str - head + 2; if (!str[3]) return str - head + 3; if (sizeof(word1) > 4) { if (!str[4]) return str - head + 4; if (!str[5]) return str - head + 5; if (!str[6]) return str - head + 6; if (!str[7]) return str - head + 7; } } wordp++; } } int main(int argc, char *argv[]) { char buffer[1024 * 1024]; int i, j = 0; for (i = 0; i < sizeof(buffer); i++) buffer[i] = 'A'; buffer[sizeof(buffer) - 1] = 0; switch (*argv[1]) { case '0': for (i = 10000; --i; ) j += strlen (buffer); break; case '1': for (i = 10000; --i; ) j += strlen_naive(buffer); break; case '2': for (i = 10000; --i; ) j += strlen_fast (buffer); break; } return 0; } $ gcc -w test.c $ time ./a.out 0 21.04 real 21.02 user 0.00 sys $ time ./a.out 1 24.55 real 24.54 user 0.00 sys $ time ./a.out 2 5.81 real 5.81 user 0.00 sys
速っ。
ちなみに -O を付けると
$ gcc -w -O test.c $ time ./a.out 0 0.00 real 0.00 user 0.00 sys $ time ./a.out 1 4.42 real 4.42 user 0.00 sys $ time ./a.out 2 1.75 real 1.75 user 0.00 sys
組み込みの strlen が超速になってるのはもちろん最適化のせい。ソースの頭に
size_t strlen_naive(const char *str) __attribute__((pure)); size_t strlen_fast(const char *str) __attribute__((pure));
を付ければ
$ gcc -w -O test.c $ time ./a.out 1 0.00 real 0.00 user 0.00 sys $ time ./a.out 2 0.00 real 0.00 user 0.00 sys