Introduction#

These two transforms have a lot of same point, so I want to introduce them together.

First of all, what’s kind of problem these algorithm solve? It’s used to solve formula like below format:

c_k = \sum_{i\oplus j = k} a_i\times b_j

$\oplus$ can be any binary bitwise operations such as or, and, xor, etc.

And FMT(Fast Mobius Transform) is used to solve above formula when operation is or or and, FWT(Fast Walsh-Hadamard Transform) is used when operation is xor.

These two algorithms are too similar so that maybe you can see some blog or solution said they are same algorithm but please remember they not.

FMT#

Let’s start with operation or.

The algorithm flow is:

Find a transform to transfer array $a, b$ , let the array transfered named $A,B$ ;
Define $C$ such that $C_i = A_i\times B_i$ ;
Use the inverse transform to get $c$ from $C$ .

Or Operation#

now the formula is:

c_k = \sum_{i\lor j = k} a_i\times b_j

We need construct a kind of transform by the algorithm flow. Let

A_i = \sum_{i=i\cup j} a_j

And we can try derive it:

\begin{aligned} A_i\times B_i &= \left(\sum_{i\cup j=i}a_j\right)\left(\sum_{i\cup k=i}b_k\right)\\ &= \sum_{i\cup(j\cup k) = i}a_jb_k\\ &= C_i \end{aligned}

This form can use a inverse transform to get $c$ form $C$ .

Now try find a quick way to calculate this transform. We know $i=i\cup j$ equal to find all the subset $j$ of $i$ , and this need $O(3^n)$ time complexity, too slow.

Maybe we can focus on index:

origin index	0	1	2	3	4	5	6	7
binary form	000	001	010	011	100	101	110	111

There is obviously a pattern: 0 and 4, 1 and 5, 2 and 6, etc. They have the same last two bits, and 0 is a subset of 4. This told us we can use this pattern make we calculate transform quickly.

1
auto FMTor(const std::vector<ll>& a, int flag) -> std::vector<ll> {
2
    auto trA{a};
3
    for (int o{2}, k{1}; o <= n; o <<= 1, k <<= 1) {
4
        for (int i{0}; i < n; i += o) {
5
            for (int j{0}; j < k; j++) {
6
                trA[i+j+k] = (trA[i+j+k] + trA[i+j] * flag % MOD + MOD) % MOD;
7
            }
8
        }
9
    }
10
    return trA;
11
}

The time complexity of this code optmize above transform to O(n\times 2^n), because the length of array is $2^n$ , maybe we can consider this algorithm is $O(n\log n)$ time complexity.

And Operation#

The algorithm flow is same.

We are trying to find a transform like FMT Or operation.

Let $A$ is the array after transform, we have:

A_i = \sum_{i=i\cap j} a_j

That means $j$ is the superset of $i$ , so just like before but we should add contribution from superset to subset.

1
auto FMTand(const std::vector<ll>& a, int flag) -> std::vector<ll> {
2
    auto trA{a};
3
    for (int o{2}, k{1}; o <= n; o <<= 1, k <<= 1) {
4
        for (int i{0}; i < n; i += o) {
5
            for (int j{0}; j < k; j++) {
6
                trA[i+j] = (trA[i+j] + trA[i+j+k] * flag % MOD + MOD) % MOD;
7
            }
8
        }
9
    }
10
    return trA;
11
}

Full Template#

Really not difficult

1
/**
2
 * @file    : FastMobiusTransform.cpp
3
 * @date    : 2026-01-15
4
 * @brief   : LuoguP4717
5
 */
6

7
#include <iostream>
8
#include <vector>
9

10
#include <iostream>
11
#include <vector>
12

13
class FastMobiusTransform {
14
private:
15
    using ll = long long;
16

17
    const int MOD;
18
    int n;
19
    std::vector<ll> a, b;
20
public:
21
    explicit FastMobiusTransform(int n, int MOD) : MOD{MOD}, n{n}, a(n), b(n) {}
22
    FastMobiusTransform(int MOD, const std::vector<ll> a, const std::vector<ll> b)
23
        : MOD{MOD}, n(a.size()), a{a}, b{b} {}
24

25
    void input() {
26
        for (int i{0}; i < n; i++) {
27
            std::cin >> a[i];
28
        }
29
        for (int i{0}; i < n; i++) {
30
            std::cin >> b[i];
31
        }
32
    }
33

34
    auto FMTor(const std::vector<ll>& a, int flag) -> std::vector<ll> {
35
        auto trA{a};
36
        for (int o{2}, k{1}; o <= n; o <<= 1, k <<= 1) {
37
            for (int i{0}; i < n; i += o) {
38
                for (int j{0}; j < k; j++) {
39
                    trA[i+j+k] = (trA[i+j+k] + trA[i+j] * flag % MOD + MOD) % MOD;
40
                }
41
            }
42
        }
43
        return trA;
44
    }
45
    auto FMTand(const std::vector<ll>& a, int flag) -> std::vector<ll> {
46
        auto trA{a};
47
        for (int o{2}, k{1}; o <= n; o <<= 1, k <<= 1) {
48
            for (int i{0}; i < n; i += o) {
49
                for (int j{0}; j < k; j++) {
50
                    trA[i+j] = (trA[i+j] + trA[i+j+k] * flag % MOD + MOD) % MOD;
51
                }
52
            }
53
        }
54
        return trA;
55
    }
56

57
    auto transformOr() -> std::vector<ll> {
58
        auto trA = FMTor(a, 1);
59
        auto trB = FMTor(b, 1);
60
        std::vector<ll> trC(n);
61
        for (int i{0}; i < n; i++) {
62
            trC[i] = trA[i] * trB[i] % MOD;
63
        }
64
        return FMTor(trC, -1);
65
    }
66
    auto transformAnd() -> std::vector<ll> {
67
        auto trA = FMTand(a, 1);
68
        auto trB = FMTand(b, 1);
69
        std::vector<ll> trC(n);
70
        for (int i{0}; i < n; i++) {
71
            trC[i] = trA[i] * trB[i] % MOD;
72
        }
73
        return FMTand(trC, -1);
74
    }
75
};
76

77
auto main() -> int {
78
    int n, MOD;
79
    std::cin >> n >> MOD;
80
    FastMobiusTransform fmt(1<<n, MOD);
81
    fmt.input();
82

83
    auto ans_or{fmt.transformOr()};
84
    for (auto& p : ans_or) {
85
        std::cout << p << ' ';
86
    }
87
    std::cout << "\n";
88
    auto ans_and{fmt.transformAnd()};
89
    for (auto& p : ans_and) {
90
        std::cout << p << ' ';
91
    }
92
    std::cout << "\n";
93
    return 0;
94
}

FWT#

As you can see this part we are going to introduce FWT, this algorithm is using to solve below formula:

c_k = \sum_{i\oplus j = k} a_i\times b_j

$\oplus$ indicates XOR operator.

Let $A$ is the transformed array of $a$ that:

A_i = \sum_{i\circ j = 0} a_j - \sum_{i\circ j = 1} a_j

In the formula, $i\circ j$ indicates $\operatorname{popcount}(i\cap j)\bmod 2$

And we can check its correctness:

\begin{aligned} A_i\times B_i &= \left(\sum_{i\circ j = 0} a_j - \sum_{i\circ j = 1} a_j\right)\times \left(\sum_{i\circ k = 0} b_k - \sum_{i\circ k = 1} b_k\right)\\ &= \left(\sum_{i\circ j = 0} a_j\sum_{i\circ k = 0} b_k + \sum_{i\circ j = 1}a_j\sum_{i\circ k = 1} b_k\right) - \left(\sum_{i\circ j = 0} a_j\sum_{i\circ k = 1} b_k + \sum_{i\circ j = 1}a_j\sum_{i\circ k = 0} b_k\right)\\ &= \sum_{(j\oplus k)\circ i = 0} a_ib_k - \sum_{(j\oplus k)\circ i = 1} a_ib_k\\ &= C_i \end{aligned}

The transformation between second row and third row is because each of they denote one situation of result of $i\oplus j$ , so they mix to one part.

How to calculate it? Still divide, we know that:

Do " $\circ$ " operater	0	1
0	0	0
1	0	1

So that:

{A'}_0 = A_0+A_1\\ {A'}_1 = A_0-A_1

Also we can get inverse transform:

A_0 = \frac{A'_0+A'_1}{2}\\ A_1 = \frac{A'_0-A'_1}{2}

Code also easily.

Full Template#

1
/**
2
 * @file    : FastWalshTransform.cpp
3
 * @date    : 2026-01-15
4
 * @brief   : LuoguP4717
5
 */
6

7
#include <iostream>
8
#include <vector>
9

10
class FastWalshTransform {
11
private:
12
    using ll = long long;
13

14
    const int MOD;
15
    int n;
16
    std::vector<ll> a, b;
17

18
    auto pow2(int x) {
19
        ll ans = 1, a = 2;
20
        while (x) {
21
            if (x & 1) ans = ans * a % MOD;
22
            a = a * a % MOD;
23
            x >>= 1;
24
        }
25
        return ans;
26
    }
27
public:
28
    explicit FastWalshTransform(int n, int MOD) : MOD{MOD}, n{n}, a(n), b(n) {}
29
    FastWalshTransform(int MOD, const std::vector<ll> a, const std::vector<ll> b)
30
        : MOD{MOD}, n(a.size()), a{a}, b{b} {}
31

32
    void input() {
33
        for (int i{0}; i < n; i++) {
34
            std::cin >> a[i];
35
        }
36
        for (int i{0}; i < n; i++) {
37
            std::cin >> b[i];
38
        }
39
    }
40

41
    auto fwtXor(const std::vector<ll>& a, int flag) -> std::vector<ll> {
42
        auto trA{a};
43
        for (int o{2}, k{1}; o <= n; o <<= 1, k <<= 1) {
44
            for (int i{0}; i < n; i += o) {
45
                for (int j{0}; j < k; j++) {
46
                    auto u{trA[i+j]}, v{trA[i+j+k]};
47
                    trA[i+j] = (flag * (trA[i+j] + v) % MOD + MOD) % MOD;
48
                    trA[i+j+k] = (flag * (u - trA[i+j+k]) % MOD + MOD) % MOD;
49
                }
50
            }
51
        }
52
        return trA;
53
    }
54

55
    auto transform() -> std::vector<ll> {
56
        auto trA{fwtXor(a, 1)};
57
        auto trB{fwtXor(b, 1)};
58
        std::vector<ll> trC(n);
59
        for (int i{0}; i < n; i++) {
60
            trC[i] = trA[i] * trB[i] % MOD;
61
        }
62
        return fwtXor(trC, pow2(MOD-2));
63
    }
64
};
65

66
auto main() -> int {
67
    int n, MOD;
68
    std::cin >> n >> MOD;
69
    FastWalshTransform fwt(1<<n, MOD);
70
    fwt.input();
71
    auto ans{fwt.transform()};
72
    for (auto& p : ans) {
73
        std::cout << p << ' ';
74
    }
75
    std::cout << "\n";
76
    return 0;
77
}

Summarize#

That’s all, actually these knowledge doesn’t usefull in OI, but we still need learn it…