constexpr Murmur3 implementation problem

Hello everybody!

This little problem of mine is driving me nuts! I'm trying to implement the Murmur3 hash as a constexpr in c++11. Specifically, I'm implementing the version for 32-bit hashes. It works, more or less. However, if I have a negative value in my key (the value to be hashed) my hash doesn't match up with the reference implementation. My algorithm accepts const char* for keys. If one of the chars is negative, it doesn't match the reference implementation. I first noticed it when I tested a string with the character "ยง" in it, which is not an ASCII-character. My IDE encoded it with two negative values. I've since tested the whole char range (-128 to 127) and all negative value cause mismatches. I've been combing through this code for days but can't find the error!

I would be eternally gratefull for any ideas! Due to the constraints of const expressions the code is in almost purely functional style and pretty hard to read. I'm sorry for that but I hope it won't keep you from trying to help me.

This is the reference:
https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp

This is my own (re-)implementation:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#include <cstdint>
#include <string>
#include <cstring>


// can calculate hashes of strings at run- and compile time.
// use only for ASCII characters!
class StringHash {
public:
    constexpr StringHash(const char* string) : value(hash(string)){}
    constexpr StringHash(StringHash& other) : value(other.value){}
    constexpr StringHash(StringHash&& other) : value(other.value){}

    bool operator == (StringHash& other) {
        return this->value == other.value;
    }

    bool operator == (StringHash&& other) {
        return this->value == other.value;
    }

    bool operator != (StringHash& other) {
        return this->value != other.value;
    }

    bool operator != (StringHash&& other) {
        return this->value != other.value;
    }

    constexpr inline static uint32_t hash(const char* string) {
        return murmur3(string, strlen(string), seed);
    }

    const uint32_t value;
    static const uint32_t seed = 0xB0F57EE3;

private:
    static const uint32_t c1 = 0xcc9e2d51;
    static const uint32_t c2 = 0x1b873593;

    constexpr inline static uint32_t _add(const uint32_t x, const int add) {
        return x + add;
    }

    constexpr inline static uint32_t _mult(const uint32_t x, const int mult) {
        return x * mult;
    }

    constexpr inline static uint32_t _xor(const uint32_t lhs, const int rhs) {
        return lhs ^ rhs;
    }

    constexpr inline static uint32_t _downshift_xor(const uint32_t x, const int downshift) {
        return x ^ (x >> downshift);
    }

    constexpr inline static uint32_t _rotl32( uint32_t x, int8_t r ) {
        return (x << r) | (x >> (32 - r));
    }

    constexpr inline static uint32_t _fmix32(const uint32_t x) {
        return _downshift_xor(
                    _mult(
                        _downshift_xor(
                            _mult(
                                _downshift_xor(x, 16),
                                0x85ebca6b),
                            13),
                        0xc2b2ae35),
                    16);
    }

    // get blocks in 32bit increments
    constexpr inline static uint32_t _getblock32 ( const char* p, const int i ) {
        return uint32_t(p[i*4+0])
                | uint32_t(p[i*4+1]) << 8
                | uint32_t(p[i*4+2]) << 16
                | uint32_t(p[i*4+3]) << 24;
    }

    constexpr inline static uint32_t _body(uint32_t x, const uint32_t block) {
        return _add(
                    _mult(
                          _rotl32(
                                  _xor(x,
                                       _mult(
                                             _rotl32(
                                                     _mult(
                                                           block,
                                                           c1),
                                                     15),
                                             c2)
                                       ),
                                  13),
                          5),
                    0xe6546b64);
    }

    constexpr inline static uint32_t _body_rec(uint32_t x, const char * blocks, int i) {
        return ((i < 0) ? _body_rec(_body(x, _getblock32(blocks, i)), blocks, i + 1) : x);
    }

    constexpr inline static uint32_t _tail1(uint32_t x, const char * tail) {
        return _mult(
                     _rotl32(
                             _mult(
                                   _xor(x,
                                        tail[0]),
                                   c1),
                             15),
                     c2);
    }

    constexpr inline static uint32_t _tail(uint32_t x, const char * tail, const int len) {
        return ((len & 3) == 3) ? _xor(
                                       x,
                                       _tail1(
                                              _xor(tail[2] << 16,
                                                   tail[1] << 8),
                                              tail))
                                : ((len & 3) == 2) ? _xor(
                                                         x,
                                                         _tail1(
                                                                tail[1] << 8,
                                                                tail))
                                                   : ((len & 3) == 1) ? _xor(
                                                                             x,
                                                                             _tail1(
                                                                                    0,
                                                                                    tail))
                                                                      : x;
    }

    constexpr inline static uint32_t _final(uint32_t tail, const int len) {
        return _fmix32(_xor(tail, len));
    }

    constexpr inline static uint32_t murmur3(const char *key, const int len, uint32_t seed) {
        return _final(
                      _tail(
                            _body_rec(seed,
                                      (key + (len/4)*4),
                                      -1*(len/4)),
                            (key + ((len/4)*4)),
                            len),
                      len);
    }
};
Last edited on
It looks to me like the reference implementation treats bytes as unsigned values (0 -255) instead of signed values (-128 - 127). You probably just need to do the same.
Hmm, true. I didn't think that would be a problem but apparently it is. The problem is: constant expressions don't allow reinterpret casts. So how do I cast from a const char pointer (string literal) to a uint8_t pointer while still honoring the constexpr restrictions?

Or if I can't do that: can anybody tell me if using signed chars influences the properties of the hash in any way?

I'd rather cast though since AFAIK char is not guaranteed to be signed and I want this to be as platform independent as possible.
Topic archived. No new replies allowed.