PageRenderTime 55ms CodeModel.GetById 1ms RepoModel.GetById 0ms app.codeStats 1ms

/src/bech32.cpp

https://github.com/bitcoin/bitcoin
C++ | 568 lines | 285 code | 64 blank | 219 comment | 101 complexity | c0b34dd95f0a1d7f7a8b491816465b14 MD5 | raw file
  1. // Copyright (c) 2017, 2021 Pieter Wuille
  2. // Copyright (c) 2021 The Bitcoin Core developers
  3. // Distributed under the MIT software license, see the accompanying
  4. // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  5. #include <bech32.h>
  6. #include <util/vector.h>
  7. #include <array>
  8. #include <assert.h>
  9. #include <numeric>
  10. #include <optional>
  11. namespace bech32
  12. {
  13. namespace
  14. {
  15. typedef std::vector<uint8_t> data;
  16. /** The Bech32 and Bech32m character set for encoding. */
  17. const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
  18. /** The Bech32 and Bech32m character set for decoding. */
  19. const int8_t CHARSET_REV[128] = {
  20. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  21. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  22. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  23. 15, -1, 10, 17, 21, 20, 26, 30, 7, 5, -1, -1, -1, -1, -1, -1,
  24. -1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
  25. 1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1,
  26. -1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
  27. 1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1
  28. };
  29. /** We work with the finite field GF(1024) defined as a degree 2 extension of the base field GF(32)
  30. * The defining polynomial of the extension is x^2 + 9x + 23.
  31. * Let (e) be a root of this defining polynomial. Then (e) is a primitive element of GF(1024),
  32. * that is, a generator of the field. Every non-zero element of the field can then be represented
  33. * as (e)^k for some power k.
  34. * The array GF1024_EXP contains all these powers of (e) - GF1024_EXP[k] = (e)^k in GF(1024).
  35. * Conversely, GF1024_LOG contains the discrete logarithms of these powers, so
  36. * GF1024_LOG[GF1024_EXP[k]] == k.
  37. * The following function generates the two tables GF1024_EXP and GF1024_LOG as constexprs. */
  38. constexpr std::pair<std::array<int16_t, 1023>, std::array<int16_t, 1024>> GenerateGFTables()
  39. {
  40. // Build table for GF(32).
  41. // We use these tables to perform arithmetic in GF(32) below, when constructing the
  42. // tables for GF(1024).
  43. std::array<int8_t, 31> GF32_EXP{};
  44. std::array<int8_t, 32> GF32_LOG{};
  45. // fmod encodes the defining polynomial of GF(32) over GF(2), x^5 + x^3 + 1.
  46. // Because coefficients in GF(2) are binary digits, the coefficients are packed as 101001.
  47. const int fmod = 41;
  48. // Elements of GF(32) are encoded as vectors of length 5 over GF(2), that is,
  49. // 5 binary digits. Each element (b_4, b_3, b_2, b_1, b_0) encodes a polynomial
  50. // b_4*x^4 + b_3*x^3 + b_2*x^2 + b_1*x^1 + b_0 (modulo fmod).
  51. // For example, 00001 = 1 is the multiplicative identity.
  52. GF32_EXP[0] = 1;
  53. GF32_LOG[0] = -1;
  54. GF32_LOG[1] = 0;
  55. int v = 1;
  56. for (int i = 1; i < 31; ++i) {
  57. // Multiplication by x is the same as shifting left by 1, as
  58. // every coefficient of the polynomial is moved up one place.
  59. v = v << 1;
  60. // If the polynomial now has an x^5 term, we subtract fmod from it
  61. // to remain working modulo fmod. Subtraction is the same as XOR in characteristic
  62. // 2 fields.
  63. if (v & 32) v ^= fmod;
  64. GF32_EXP[i] = v;
  65. GF32_LOG[v] = i;
  66. }
  67. // Build table for GF(1024)
  68. std::array<int16_t, 1023> GF1024_EXP{};
  69. std::array<int16_t, 1024> GF1024_LOG{};
  70. GF1024_EXP[0] = 1;
  71. GF1024_LOG[0] = -1;
  72. GF1024_LOG[1] = 0;
  73. // Each element v of GF(1024) is encoded as a 10 bit integer in the following way:
  74. // v = v1 || v0 where v0, v1 are 5-bit integers (elements of GF(32)).
  75. // The element (e) is encoded as 1 || 0, to represent 1*(e) + 0. Every other element
  76. // a*(e) + b is represented as a || b (a and b are both GF(32) elements). Given (v),
  77. // we compute (e)*(v) by multiplying in the following way:
  78. //
  79. // v0' = 23*v1
  80. // v1' = 9*v1 + v0
  81. // e*v = v1' || v0'
  82. //
  83. // Where 23, 9 are GF(32) elements encoded as described above. Multiplication in GF(32)
  84. // is done using the log/exp tables:
  85. // e^x * e^y = e^(x + y) so a * b = EXP[ LOG[a] + LOG [b] ]
  86. // for non-zero a and b.
  87. v = 1;
  88. for (int i = 1; i < 1023; ++i) {
  89. int v0 = v & 31;
  90. int v1 = v >> 5;
  91. int v0n = v1 ? GF32_EXP.at((GF32_LOG.at(v1) + GF32_LOG.at(23)) % 31) : 0;
  92. int v1n = (v1 ? GF32_EXP.at((GF32_LOG.at(v1) + GF32_LOG.at(9)) % 31) : 0) ^ v0;
  93. v = v1n << 5 | v0n;
  94. GF1024_EXP[i] = v;
  95. GF1024_LOG[v] = i;
  96. }
  97. return std::make_pair(GF1024_EXP, GF1024_LOG);
  98. }
  99. constexpr auto tables = GenerateGFTables();
  100. constexpr const std::array<int16_t, 1023>& GF1024_EXP = tables.first;
  101. constexpr const std::array<int16_t, 1024>& GF1024_LOG = tables.second;
  102. /* Determine the final constant to use for the specified encoding. */
  103. uint32_t EncodingConstant(Encoding encoding) {
  104. assert(encoding == Encoding::BECH32 || encoding == Encoding::BECH32M);
  105. return encoding == Encoding::BECH32 ? 1 : 0x2bc830a3;
  106. }
  107. /** This function will compute what 6 5-bit values to XOR into the last 6 input values, in order to
  108. * make the checksum 0. These 6 values are packed together in a single 30-bit integer. The higher
  109. * bits correspond to earlier values. */
  110. uint32_t PolyMod(const data& v)
  111. {
  112. // The input is interpreted as a list of coefficients of a polynomial over F = GF(32), with an
  113. // implicit 1 in front. If the input is [v0,v1,v2,v3,v4], that polynomial is v(x) =
  114. // 1*x^5 + v0*x^4 + v1*x^3 + v2*x^2 + v3*x + v4. The implicit 1 guarantees that
  115. // [v0,v1,v2,...] has a distinct checksum from [0,v0,v1,v2,...].
  116. // The output is a 30-bit integer whose 5-bit groups are the coefficients of the remainder of
  117. // v(x) mod g(x), where g(x) is the Bech32 generator,
  118. // x^6 + {29}x^5 + {22}x^4 + {20}x^3 + {21}x^2 + {29}x + {18}. g(x) is chosen in such a way
  119. // that the resulting code is a BCH code, guaranteeing detection of up to 3 errors within a
  120. // window of 1023 characters. Among the various possible BCH codes, one was selected to in
  121. // fact guarantee detection of up to 4 errors within a window of 89 characters.
  122. // Note that the coefficients are elements of GF(32), here represented as decimal numbers
  123. // between {}. In this finite field, addition is just XOR of the corresponding numbers. For
  124. // example, {27} + {13} = {27 ^ 13} = {22}. Multiplication is more complicated, and requires
  125. // treating the bits of values themselves as coefficients of a polynomial over a smaller field,
  126. // GF(2), and multiplying those polynomials mod a^5 + a^3 + 1. For example, {5} * {26} =
  127. // (a^2 + 1) * (a^4 + a^3 + a) = (a^4 + a^3 + a) * a^2 + (a^4 + a^3 + a) = a^6 + a^5 + a^4 + a
  128. // = a^3 + 1 (mod a^5 + a^3 + 1) = {9}.
  129. // During the course of the loop below, `c` contains the bitpacked coefficients of the
  130. // polynomial constructed from just the values of v that were processed so far, mod g(x). In
  131. // the above example, `c` initially corresponds to 1 mod g(x), and after processing 2 inputs of
  132. // v, it corresponds to x^2 + v0*x + v1 mod g(x). As 1 mod g(x) = 1, that is the starting value
  133. // for `c`.
  134. // The following Sage code constructs the generator used:
  135. //
  136. // B = GF(2) # Binary field
  137. // BP.<b> = B[] # Polynomials over the binary field
  138. // F_mod = b**5 + b**3 + 1
  139. // F.<f> = GF(32, modulus=F_mod, repr='int') # GF(32) definition
  140. // FP.<x> = F[] # Polynomials over GF(32)
  141. // E_mod = x**2 + F.fetch_int(9)*x + F.fetch_int(23)
  142. // E.<e> = F.extension(E_mod) # GF(1024) extension field definition
  143. // for p in divisors(E.order() - 1): # Verify e has order 1023.
  144. // assert((e**p == 1) == (p % 1023 == 0))
  145. // G = lcm([(e**i).minpoly() for i in range(997,1000)])
  146. // print(G) # Print out the generator
  147. //
  148. // It demonstrates that g(x) is the least common multiple of the minimal polynomials
  149. // of 3 consecutive powers (997,998,999) of a primitive element (e) of GF(1024).
  150. // That guarantees it is, in fact, the generator of a primitive BCH code with cycle
  151. // length 1023 and distance 4. See https://en.wikipedia.org/wiki/BCH_code for more details.
  152. uint32_t c = 1;
  153. for (const auto v_i : v) {
  154. // We want to update `c` to correspond to a polynomial with one extra term. If the initial
  155. // value of `c` consists of the coefficients of c(x) = f(x) mod g(x), we modify it to
  156. // correspond to c'(x) = (f(x) * x + v_i) mod g(x), where v_i is the next input to
  157. // process. Simplifying:
  158. // c'(x) = (f(x) * x + v_i) mod g(x)
  159. // ((f(x) mod g(x)) * x + v_i) mod g(x)
  160. // (c(x) * x + v_i) mod g(x)
  161. // If c(x) = c0*x^5 + c1*x^4 + c2*x^3 + c3*x^2 + c4*x + c5, we want to compute
  162. // c'(x) = (c0*x^5 + c1*x^4 + c2*x^3 + c3*x^2 + c4*x + c5) * x + v_i mod g(x)
  163. // = c0*x^6 + c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i mod g(x)
  164. // = c0*(x^6 mod g(x)) + c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i
  165. // If we call (x^6 mod g(x)) = k(x), this can be written as
  166. // c'(x) = (c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i) + c0*k(x)
  167. // First, determine the value of c0:
  168. uint8_t c0 = c >> 25;
  169. // Then compute c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i:
  170. c = ((c & 0x1ffffff) << 5) ^ v_i;
  171. // Finally, for each set bit n in c0, conditionally add {2^n}k(x). These constants can be
  172. // computed using the following Sage code (continuing the code above):
  173. //
  174. // for i in [1,2,4,8,16]: # Print out {1,2,4,8,16}*(g(x) mod x^6), packed in hex integers.
  175. // v = 0
  176. // for coef in reversed((F.fetch_int(i)*(G % x**6)).coefficients(sparse=True)):
  177. // v = v*32 + coef.integer_representation()
  178. // print("0x%x" % v)
  179. //
  180. if (c0 & 1) c ^= 0x3b6a57b2; // k(x) = {29}x^5 + {22}x^4 + {20}x^3 + {21}x^2 + {29}x + {18}
  181. if (c0 & 2) c ^= 0x26508e6d; // {2}k(x) = {19}x^5 + {5}x^4 + x^3 + {3}x^2 + {19}x + {13}
  182. if (c0 & 4) c ^= 0x1ea119fa; // {4}k(x) = {15}x^5 + {10}x^4 + {2}x^3 + {6}x^2 + {15}x + {26}
  183. if (c0 & 8) c ^= 0x3d4233dd; // {8}k(x) = {30}x^5 + {20}x^4 + {4}x^3 + {12}x^2 + {30}x + {29}
  184. if (c0 & 16) c ^= 0x2a1462b3; // {16}k(x) = {21}x^5 + x^4 + {8}x^3 + {24}x^2 + {21}x + {19}
  185. }
  186. return c;
  187. }
  188. /** Syndrome computes the values s_j = R(e^j) for j in [997, 998, 999]. As described above, the
  189. * generator polynomial G is the LCM of the minimal polynomials of (e)^997, (e)^998, and (e)^999.
  190. *
  191. * Consider a codeword with errors, of the form R(x) = C(x) + E(x). The residue is the bit-packed
  192. * result of computing R(x) mod G(X), where G is the generator of the code. Because C(x) is a valid
  193. * codeword, it is a multiple of G(X), so the residue is in fact just E(x) mod G(x). Note that all
  194. * of the (e)^j are roots of G(x) by definition, so R((e)^j) = E((e)^j).
  195. *
  196. * Let R(x) = r1*x^5 + r2*x^4 + r3*x^3 + r4*x^2 + r5*x + r6
  197. *
  198. * To compute R((e)^j), we are really computing:
  199. * r1*(e)^(j*5) + r2*(e)^(j*4) + r3*(e)^(j*3) + r4*(e)^(j*2) + r5*(e)^j + r6
  200. *
  201. * Now note that all of the (e)^(j*i) for i in [5..0] are constants and can be precomputed.
  202. * But even more than that, we can consider each coefficient as a bit-string.
  203. * For example, take r5 = (b_5, b_4, b_3, b_2, b_1) written out as 5 bits. Then:
  204. * r5*(e)^j = b_1*(e)^j + b_2*(2*(e)^j) + b_3*(4*(e)^j) + b_4*(8*(e)^j) + b_5*(16*(e)^j)
  205. * where all the (2^i*(e)^j) are constants and can be precomputed.
  206. *
  207. * Then we just add each of these corresponding constants to our final value based on the
  208. * bit values b_i. This is exactly what is done in the Syndrome function below.
  209. */
  210. constexpr std::array<uint32_t, 25> GenerateSyndromeConstants() {
  211. std::array<uint32_t, 25> SYNDROME_CONSTS{};
  212. for (int k = 1; k < 6; ++k) {
  213. for (int shift = 0; shift < 5; ++shift) {
  214. int16_t b = GF1024_LOG.at(1 << shift);
  215. int16_t c0 = GF1024_EXP.at((997*k + b) % 1023);
  216. int16_t c1 = GF1024_EXP.at((998*k + b) % 1023);
  217. int16_t c2 = GF1024_EXP.at((999*k + b) % 1023);
  218. uint32_t c = c2 << 20 | c1 << 10 | c0;
  219. int ind = 5*(k-1) + shift;
  220. SYNDROME_CONSTS[ind] = c;
  221. }
  222. }
  223. return SYNDROME_CONSTS;
  224. }
  225. constexpr std::array<uint32_t, 25> SYNDROME_CONSTS = GenerateSyndromeConstants();
  226. /**
  227. * Syndrome returns the three values s_997, s_998, and s_999 described above,
  228. * packed into a 30-bit integer, where each group of 10 bits encodes one value.
  229. */
  230. uint32_t Syndrome(const uint32_t residue) {
  231. // low is the first 5 bits, corresponding to the r6 in the residue
  232. // (the constant term of the polynomial).
  233. uint32_t low = residue & 0x1f;
  234. // We begin by setting s_j = low = r6 for all three values of j, because these are unconditional.
  235. uint32_t result = low ^ (low << 10) ^ (low << 20);
  236. // Then for each following bit, we add the corresponding precomputed constant if the bit is 1.
  237. // For example, 0x31edd3c4 is 1100011110 1101110100 1111000100 when unpacked in groups of 10
  238. // bits, corresponding exactly to a^999 || a^998 || a^997 (matching the corresponding values in
  239. // GF1024_EXP above). In this way, we compute all three values of s_j for j in (997, 998, 999)
  240. // simultaneously. Recall that XOR corresponds to addition in a characteristic 2 field.
  241. for (int i = 0; i < 25; ++i) {
  242. result ^= ((residue >> (5+i)) & 1 ? SYNDROME_CONSTS.at(i) : 0);
  243. }
  244. return result;
  245. }
  246. /** Convert to lower case. */
  247. inline unsigned char LowerCase(unsigned char c)
  248. {
  249. return (c >= 'A' && c <= 'Z') ? (c - 'A') + 'a' : c;
  250. }
  251. /** Return indices of invalid characters in a Bech32 string. */
  252. bool CheckCharacters(const std::string& str, std::vector<int>& errors) {
  253. bool lower = false, upper = false;
  254. for (size_t i = 0; i < str.size(); ++i) {
  255. unsigned char c = str[i];
  256. if (c >= 'a' && c <= 'z') {
  257. if (upper) {
  258. errors.push_back(i);
  259. } else {
  260. lower = true;
  261. }
  262. } else if (c >= 'A' && c <= 'Z') {
  263. if (lower) {
  264. errors.push_back(i);
  265. } else {
  266. upper = true;
  267. }
  268. } else if (c < 33 || c > 126) {
  269. errors.push_back(i);
  270. }
  271. }
  272. return errors.empty();
  273. }
  274. /** Expand a HRP for use in checksum computation. */
  275. data ExpandHRP(const std::string& hrp)
  276. {
  277. data ret;
  278. ret.reserve(hrp.size() + 90);
  279. ret.resize(hrp.size() * 2 + 1);
  280. for (size_t i = 0; i < hrp.size(); ++i) {
  281. unsigned char c = hrp[i];
  282. ret[i] = c >> 5;
  283. ret[i + hrp.size() + 1] = c & 0x1f;
  284. }
  285. ret[hrp.size()] = 0;
  286. return ret;
  287. }
  288. /** Verify a checksum. */
  289. Encoding VerifyChecksum(const std::string& hrp, const data& values)
  290. {
  291. // PolyMod computes what value to xor into the final values to make the checksum 0. However,
  292. // if we required that the checksum was 0, it would be the case that appending a 0 to a valid
  293. // list of values would result in a new valid list. For that reason, Bech32 requires the
  294. // resulting checksum to be 1 instead. In Bech32m, this constant was amended. See
  295. // https://gist.github.com/sipa/14c248c288c3880a3b191f978a34508e for details.
  296. const uint32_t check = PolyMod(Cat(ExpandHRP(hrp), values));
  297. if (check == EncodingConstant(Encoding::BECH32)) return Encoding::BECH32;
  298. if (check == EncodingConstant(Encoding::BECH32M)) return Encoding::BECH32M;
  299. return Encoding::INVALID;
  300. }
  301. /** Create a checksum. */
  302. data CreateChecksum(Encoding encoding, const std::string& hrp, const data& values)
  303. {
  304. data enc = Cat(ExpandHRP(hrp), values);
  305. enc.resize(enc.size() + 6); // Append 6 zeroes
  306. uint32_t mod = PolyMod(enc) ^ EncodingConstant(encoding); // Determine what to XOR into those 6 zeroes.
  307. data ret(6);
  308. for (size_t i = 0; i < 6; ++i) {
  309. // Convert the 5-bit groups in mod to checksum values.
  310. ret[i] = (mod >> (5 * (5 - i))) & 31;
  311. }
  312. return ret;
  313. }
  314. } // namespace
  315. /** Encode a Bech32 or Bech32m string. */
  316. std::string Encode(Encoding encoding, const std::string& hrp, const data& values) {
  317. // First ensure that the HRP is all lowercase. BIP-173 and BIP350 require an encoder
  318. // to return a lowercase Bech32/Bech32m string, but if given an uppercase HRP, the
  319. // result will always be invalid.
  320. for (const char& c : hrp) assert(c < 'A' || c > 'Z');
  321. data checksum = CreateChecksum(encoding, hrp, values);
  322. data combined = Cat(values, checksum);
  323. std::string ret = hrp + '1';
  324. ret.reserve(ret.size() + combined.size());
  325. for (const auto c : combined) {
  326. ret += CHARSET[c];
  327. }
  328. return ret;
  329. }
  330. /** Decode a Bech32 or Bech32m string. */
  331. DecodeResult Decode(const std::string& str) {
  332. std::vector<int> errors;
  333. if (!CheckCharacters(str, errors)) return {};
  334. size_t pos = str.rfind('1');
  335. if (str.size() > 90 || pos == str.npos || pos == 0 || pos + 7 > str.size()) {
  336. return {};
  337. }
  338. data values(str.size() - 1 - pos);
  339. for (size_t i = 0; i < str.size() - 1 - pos; ++i) {
  340. unsigned char c = str[i + pos + 1];
  341. int8_t rev = CHARSET_REV[c];
  342. if (rev == -1) {
  343. return {};
  344. }
  345. values[i] = rev;
  346. }
  347. std::string hrp;
  348. for (size_t i = 0; i < pos; ++i) {
  349. hrp += LowerCase(str[i]);
  350. }
  351. Encoding result = VerifyChecksum(hrp, values);
  352. if (result == Encoding::INVALID) return {};
  353. return {result, std::move(hrp), data(values.begin(), values.end() - 6)};
  354. }
  355. /** Find index of an incorrect character in a Bech32 string. */
  356. std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str) {
  357. std::vector<int> error_locations{};
  358. if (str.size() > 90) {
  359. error_locations.resize(str.size() - 90);
  360. std::iota(error_locations.begin(), error_locations.end(), 90);
  361. return std::make_pair("Bech32 string too long", std::move(error_locations));
  362. }
  363. if (!CheckCharacters(str, error_locations)){
  364. return std::make_pair("Invalid character or mixed case", std::move(error_locations));
  365. }
  366. size_t pos = str.rfind('1');
  367. if (pos == str.npos) {
  368. return std::make_pair("Missing separator", std::vector<int>{});
  369. }
  370. if (pos == 0 || pos + 7 > str.size()) {
  371. error_locations.push_back(pos);
  372. return std::make_pair("Invalid separator position", std::move(error_locations));
  373. }
  374. std::string hrp;
  375. for (size_t i = 0; i < pos; ++i) {
  376. hrp += LowerCase(str[i]);
  377. }
  378. size_t length = str.size() - 1 - pos; // length of data part
  379. data values(length);
  380. for (size_t i = pos + 1; i < str.size(); ++i) {
  381. unsigned char c = str[i];
  382. int8_t rev = CHARSET_REV[c];
  383. if (rev == -1) {
  384. error_locations.push_back(i);
  385. return std::make_pair("Invalid Base 32 character", std::move(error_locations));
  386. }
  387. values[i - pos - 1] = rev;
  388. }
  389. // We attempt error detection with both bech32 and bech32m, and choose the one with the fewest errors
  390. // We can't simply use the segwit version, because that may be one of the errors
  391. std::optional<Encoding> error_encoding;
  392. for (Encoding encoding : {Encoding::BECH32, Encoding::BECH32M}) {
  393. std::vector<int> possible_errors;
  394. // Recall that (ExpandHRP(hrp) ++ values) is interpreted as a list of coefficients of a polynomial
  395. // over GF(32). PolyMod computes the "remainder" of this polynomial modulo the generator G(x).
  396. uint32_t residue = PolyMod(Cat(ExpandHRP(hrp), values)) ^ EncodingConstant(encoding);
  397. // All valid codewords should be multiples of G(x), so this remainder (after XORing with the encoding
  398. // constant) should be 0 - hence 0 indicates there are no errors present.
  399. if (residue != 0) {
  400. // If errors are present, our polynomial must be of the form C(x) + E(x) where C is the valid
  401. // codeword (a multiple of G(x)), and E encodes the errors.
  402. uint32_t syn = Syndrome(residue);
  403. // Unpack the three 10-bit syndrome values
  404. int s0 = syn & 0x3FF;
  405. int s1 = (syn >> 10) & 0x3FF;
  406. int s2 = syn >> 20;
  407. // Get the discrete logs of these values in GF1024 for more efficient computation
  408. int l_s0 = GF1024_LOG.at(s0);
  409. int l_s1 = GF1024_LOG.at(s1);
  410. int l_s2 = GF1024_LOG.at(s2);
  411. // First, suppose there is only a single error. Then E(x) = e1*x^p1 for some position p1
  412. // Then s0 = E((e)^997) = e1*(e)^(997*p1) and s1 = E((e)^998) = e1*(e)^(998*p1)
  413. // Therefore s1/s0 = (e)^p1, and by the same logic, s2/s1 = (e)^p1 too.
  414. // Hence, s1^2 == s0*s2, which is exactly the condition we check first:
  415. if (l_s0 != -1 && l_s1 != -1 && l_s2 != -1 && (2 * l_s1 - l_s2 - l_s0 + 2046) % 1023 == 0) {
  416. // Compute the error position p1 as l_s1 - l_s0 = p1 (mod 1023)
  417. size_t p1 = (l_s1 - l_s0 + 1023) % 1023; // the +1023 ensures it is positive
  418. // Now because s0 = e1*(e)^(997*p1), we get e1 = s0/((e)^(997*p1)). Remember that (e)^1023 = 1,
  419. // so 1/((e)^997) = (e)^(1023-997).
  420. int l_e1 = l_s0 + (1023 - 997) * p1;
  421. // Finally, some sanity checks on the result:
  422. // - The error position should be within the length of the data
  423. // - e1 should be in GF(32), which implies that e1 = (e)^(33k) for some k (the 31 non-zero elements
  424. // of GF(32) form an index 33 subgroup of the 1023 non-zero elements of GF(1024)).
  425. if (p1 < length && !(l_e1 % 33)) {
  426. // Polynomials run from highest power to lowest, so the index p1 is from the right.
  427. // We don't return e1 because it is dangerous to suggest corrections to the user,
  428. // the user should check the address themselves.
  429. possible_errors.push_back(str.size() - p1 - 1);
  430. }
  431. // Otherwise, suppose there are two errors. Then E(x) = e1*x^p1 + e2*x^p2.
  432. } else {
  433. // For all possible first error positions p1
  434. for (size_t p1 = 0; p1 < length; ++p1) {
  435. // We have guessed p1, and want to solve for p2. Recall that E(x) = e1*x^p1 + e2*x^p2, so
  436. // s0 = E((e)^997) = e1*(e)^(997^p1) + e2*(e)^(997*p2), and similar for s1 and s2.
  437. //
  438. // Consider s2 + s1*(e)^p1
  439. // = 2e1*(e)^(999^p1) + e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1
  440. // = e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1
  441. // (Because we are working in characteristic 2.)
  442. // = e2*(e)^(998*p2) ((e)^p2 + (e)^p1)
  443. //
  444. int s2_s1p1 = s2 ^ (s1 == 0 ? 0 : GF1024_EXP.at((l_s1 + p1) % 1023));
  445. if (s2_s1p1 == 0) continue;
  446. int l_s2_s1p1 = GF1024_LOG.at(s2_s1p1);
  447. // Similarly, s1 + s0*(e)^p1
  448. // = e2*(e)^(997*p2) ((e)^p2 + (e)^p1)
  449. int s1_s0p1 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP.at((l_s0 + p1) % 1023));
  450. if (s1_s0p1 == 0) continue;
  451. int l_s1_s0p1 = GF1024_LOG.at(s1_s0p1);
  452. // So, putting these together, we can compute the second error position as
  453. // (e)^p2 = (s2 + s1^p1)/(s1 + s0^p1)
  454. // p2 = log((e)^p2)
  455. size_t p2 = (l_s2_s1p1 - l_s1_s0p1 + 1023) % 1023;
  456. // Sanity checks that p2 is a valid position and not the same as p1
  457. if (p2 >= length || p1 == p2) continue;
  458. // Now we want to compute the error values e1 and e2.
  459. // Similar to above, we compute s1 + s0*(e)^p2
  460. // = e1*(e)^(997*p1) ((e)^p1 + (e)^p2)
  461. int s1_s0p2 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP.at((l_s0 + p2) % 1023));
  462. if (s1_s0p2 == 0) continue;
  463. int l_s1_s0p2 = GF1024_LOG.at(s1_s0p2);
  464. // And compute (the log of) 1/((e)^p1 + (e)^p2))
  465. int inv_p1_p2 = 1023 - GF1024_LOG.at(GF1024_EXP.at(p1) ^ GF1024_EXP.at(p2));
  466. // Then (s1 + s0*(e)^p1) * (1/((e)^p1 + (e)^p2)))
  467. // = e2*(e)^(997*p2)
  468. // Then recover e2 by dividing by (e)^(997*p2)
  469. int l_e2 = l_s1_s0p1 + inv_p1_p2 + (1023 - 997) * p2;
  470. // Check that e2 is in GF(32)
  471. if (l_e2 % 33) continue;
  472. // In the same way, (s1 + s0*(e)^p2) * (1/((e)^p1 + (e)^p2)))
  473. // = e1*(e)^(997*p1)
  474. // So recover e1 by dividing by (e)^(997*p1)
  475. int l_e1 = l_s1_s0p2 + inv_p1_p2 + (1023 - 997) * p1;
  476. // Check that e1 is in GF(32)
  477. if (l_e1 % 33) continue;
  478. // Again, we do not return e1 or e2 for safety.
  479. // Order the error positions from the left of the string and return them
  480. if (p1 > p2) {
  481. possible_errors.push_back(str.size() - p1 - 1);
  482. possible_errors.push_back(str.size() - p2 - 1);
  483. } else {
  484. possible_errors.push_back(str.size() - p2 - 1);
  485. possible_errors.push_back(str.size() - p1 - 1);
  486. }
  487. break;
  488. }
  489. }
  490. } else {
  491. // No errors
  492. return std::make_pair("", std::vector<int>{});
  493. }
  494. if (error_locations.empty() || (!possible_errors.empty() && possible_errors.size() < error_locations.size())) {
  495. error_locations = std::move(possible_errors);
  496. if (!error_locations.empty()) error_encoding = encoding;
  497. }
  498. }
  499. std::string error_message = error_encoding == Encoding::BECH32M ? "Invalid Bech32m checksum"
  500. : error_encoding == Encoding::BECH32 ? "Invalid Bech32 checksum"
  501. : "Invalid checksum";
  502. return std::make_pair(error_message, std::move(error_locations));
  503. }
  504. } // namespace bech32