🌑

小羊儿的心情天空

CDQ分治做FFT

Sep 1, 2018 由 小羊

CDQ分治+FFT

要计算的式子一般有这样的特性:自己的后项由前项推出。时间 O(nlog2n)O(n\log^2n)

fn=0inaifnif_n = \sum_{0 \le i \le n} a_i f_{n-i}

// dp[i] = sigma(a[j] * dp[i-j]) (j < i);
num_t x[MAXN], y[MAXN], a[MAXN], dp[MAXN];

void cdqFFT(int L, int R)
{
    if (L == R) return; // (assert that a[0] == 0)
    int mid = (L + R) >> 1;
    cdqFFT(L, mid);

    int len = R - L + 1;
    trans.init(log2_ceil2(len));

    // x = a0, a1, a2, ..., a[len], 0, 0, ...
    // y = dp[L], dp[L+1], dp[L+2], ..., dp[mid], 0, 0, ...
    for (int i = 0; i < trans.N; i++)
    {
        x[i] = i < len ? a[i] : 0;
        y[i] = (i <= mid-L) ? dp[i+L] : 0;
    }

    trans.DFT(x); trans.DFT(y);
    for (int i = 0; i < trans.N; ++i)
        x[i] = x[i] * y[i];
    trans.IDFT(x);

    for (int i = mid + 1; i <= R; ++i)
        dp[i] += x[i-L];
    cdqFFT(mid + 1, R);
}

A0=1;An+1=16(i+j+k=nAiAjAk+3i+2j=nAiAj+23i=nAi)A_0 = 1; A_{n+1} = \frac{1}{6} \left( \sum_{i+j+k=n} A_iA_jA_k + 3\sum_{i+2j=n} A_iA_j + 2\sum_{3i=n} A_i \right)

num_t tp[MAXN], tp2[MAXN], tp3[MAXN], A[MAXN];

inline void cdqFFT(int l, int r)
{
    if (l == r)
    {
        if (l == 0)
        {
            A[l] = 1;
        }
        else
        {
            A[l] = (l % 3 == 1) ? (A[l] + A[l/3] * 2) % mod : A[l]; // for the last item.
            A[l] = A[l] * inv6 % mod;
        }
        return; // Last modify of A[i], so we can divide 6 here.
    }

    int mid = (l + r) >> 1, len = r - l;
    cdqFFT(l, mid);

    trans.init(log2_ceil2(len * 2 + mid - l));

    // tp[]  = A[l], A[l+1], A[l+2], ..., A[mid], 0, 0, ...
    // tp2[] = A[0], A[1], A[2], ..., A[len-1], 0, 0, ...
    // tp3[] = A[0], 0, A[1], 0, A[2], 0, ..., at most A[len/2].
    for (int i = 0; i < trans.N; i++)
    {
        tp[i] = (i <= mid - l) ? A[i + l] : 0;
        tp2[i] = (i < len) ? A[i] : 0;
        tp3[i] = ((i & 1) == 0 && i <= len) ? A[i / 2] : 0;
    }

    trans.DFT(tp); trans.DFT(tp2); trans.DFT(tp3);

    for (int i = 0; i < trans.N; i++)
        tp2[i] = (tp2[i] * tp2[i] % mod) * tp[i] % mod;
    trans.IDFT(tp2); // tp2-new = A[i]A[j]A[k], i+j+k=n

    if (!l)
    {
        for (int i = mid + 1; i <= r; i++)
            A[i] = (A[i] + tp2[i - l - 1]) % mod;
    }
    else
    {
        for (int i = mid + 1; i <= r; i++)
            A[i] = (A[i] + tp2[i - l - 1] * 3) % mod;
    }

    for (int i = 0; i < trans.N; i++)
        tp[i] = tp[i] * tp3[i] % mod; // tp-new = A[i]A[j], i+2j=n
    trans.IDFT(tp);

    for (int i = mid + 1; i <= r; i++)
        A[i] = (A[i] + tp[i - l - 1] * 3) % mod;

    cdqFFT(mid + 1, r);
}

cdqFFT(L,R)cdqFFT(L,R) 可以将L~R内的所有项数值更新完成。

cdqFFT(L,mid)cdqFFT(L,mid) 后,左边一半处理完成,然后利用左边一半的数值更新右边一半。

以上述第一个卷积为例,卷积完后,有

xs=i+j=saifj+Lx_s = \sum_{i+j=s} a_i f_{j+L}

然后向后项更新时,会有

fk+=i+j=kaifj(k[mid+1,R],j[L,mid])f_k += \sum_{i+j=k} a_i f_j (k \in [mid+1,R], j \in [L,mid])

此时,fkf_k 已经内含了 i=0midaifki\sum_{i=0}^{mid} a_i f_{k-i},只需要计算 i=mid+1kaifki\sum_{i=mid+1}^{k} a_i f_{k-i}

以上述第二个卷积为例,

As+=i+j+k=sAiAjAk(s[mid+1,R],i[L,mid])A_s += \sum_{i+j+k=s} A_i A_j A_k (s \in [mid+1,R], i \in [L,mid])

此时,AsA_s 已经内含了 i=0midAiAjAk\sum_{i=0}^{mid} A_i A_j A_k,此时被卷积的 j+k[0,len2]j+k \in [0,\frac{len}{2}],而 len2mid\frac{len}{2} \le mid,没有利用到未计算完成的值。

在进行 cdqFFT(mid+1,R)cdqFFT(mid+1, R) 时,fkf_k 会被更新到新的 midmid 值,直到最后一次被更新。

时间复杂度 T(n)=2T(n2)+O(nlogn),T(1)=O(1),T(n)=O(nlog2n)T(n) = 2T(\frac{n}{2}) + O(nlogn), T(1) = O(1), T(n) = O(nlog^2n)

多项式计算

多项式求导

(k=0akxk)=k=1akkxk1\left(\sum_{k=0}^\infty a_k x^k\right)' = \sum_{k=1}^\infty a_k k x^{k-1}

void diff(int a[], OUT int b[], int len)
{
    for (int i = 1; i < len; i++)
        b[i-1] = 1LL * i * a[i] % mod;
    b[len] = b[len-1] = 0;
}

多项式积分

(k=0akxk)dx=k=0akk+1xk+1+C\int\left(\sum_{k=0}^\infty a_k x^k\right)dx = \sum_{k=0}^\infty \frac{a_k}{k+1} x^{k+1} + C

void integral(int a[], OUT int b[], int len)
{
    for (int i = 1; i < len; i++)
        b[i] = 1LL * a[i-1] * inv[i] % mod;
    b[0] = 0;
}

多项式求逆

当常数项与模互质时有逆元。

Bt+1(x)=2Bt(x)A(x)Bt2(x)B_{t+1}(x) = 2B_t(x) - A(x)B_t^2(x)

num_t A[_], B[_];

void get_inv(int a[], OUT b[], int len)
{
    if (len == 1)
    {
        b[0] = fpow(a[0], mod-2);
        return;
    }

    get_inv(a, b, len>>1);
    for (int i = 0; i < len; i++)
        A[i] = a[i], B[i] = b[i];
    trans.init(log2_floor(len)+1);
    trans.DFT(A); trans.DFT(B);
    for (int i = 0; i < trans.N; i++)
        A[i] = 1LL * A[i] * B[i] % mod * B[i] % mod;
    trans.IDFT(A);
    for (int i = 0; i < len; i++)
        b[i] = (2LL * b[i] % mod - A[i] + mod) % mod;
    for (int i = 0; i < trans.N; i++)
        A[i] = B[i] = 0;
}

牛顿迭代

Bt+1(x)=Bt(x)F(Bt(x))F(Bt(x))B_{t+1}(x) = B_t(x) - \frac{F(B_t(x))}{F'(B_t(x))}

多项式开方

Bt+1(x)=12(A(x)Bt(x)+Bt(x))B_{t+1}(x) = \frac{1}{2} \left( \frac{A(x)}{B_t(x)} + B_t(x) \right)

num_t C[_], D[_], inv2 = (mod-1)/2;

void get_sqrt(int a[], OUT int b[], int len)
{
    if (len == 1)
    {
        b[0] = sqrt(a[0]);
        return;
    }

    get_sqrt(a, b, len>>1);
    for (int i = 0; i < len; i++)
        C[i] = a[i];
    get_inv(b, D, len);
    trans.init(log2_floor(len)+1);
    trans.DFT(C); trans.DFT(D);
    for (int i = 0; i < trans.N; i++)
        D[i] = 1LL * D[i] * C[i] % mod;
    trans.IDFT(D);
    for (int i = 0; i < len; i++)
        b[i] = 1LL * (b[i] + D[i]) % mod * inv2 % mod;
    for (int i = 0; i < trans.N; i++)
        C[i] = D[i] = 0;
}

拆系数FFT

为了防止 A(x)B(x)A(x) \cdot B(x) 爆long long,所以设一个常数 MmodM \approx \sqrt{mod}

A(x)=F(x)M,B(x)=F(x)%M,C(x)=G(x)M,D(x)=G(x)%MA(x) = \frac{F(x)}{M}, B(x) = F(x)\% M, C(x) = \frac{G(x)}{M}, D(x) = G(x) \% M

于是卷积就变成了这样的七次:

M2A(x)C(x)+M(A(x)D(x)+B(x)C(x))+B(x)D(x)M^2 A(x) C(x) + M (A(x)D(x) + B(x)C(x)) + B(x)D(x)