形式幂级数复合|复合逆
形式幂级数的复合和复合逆也是常见的形式幂级数操作,对于没有特殊性质的
形式幂级数/多项式的复合
若要计算
我们考虑环
根据 常系数齐次线性递推 中提到的 Bostan–Mori 算法,Kinoshita 和 Li 指出可以将其修改为二元形式:
这样递归的计算在
在计算
那么我们有
注意第三个参数是因为
另外因为调用的限制最后递归终止时的
常见的特殊形式复合
我们常用的 多项式初等函数 都可以通过复合计算:
在复合逆的计算中我们也会用到求幂函数。
Kronecker 代换
在分析时间复杂度之前我们先考虑如何作二元多项式乘法,一种想法是将系数「打包」,这一方法由 Kronecker 在 1882 年通过
不妨设
我们使用 Kronecker 代换再计算一元多项式乘法即可,不难发现在
模板(P5373【模板】多项式复合函数)
代码相对于原算法作了一些简化及修改,使得代码更短。
#include <cassert>
#include <cstdio>
#include <tuple>
#include <utility>
#include <vector>
using uint = unsigned;
using ull = unsigned long long;
constexpr uint MOD = 998244353;
constexpr uint PowMod(uint a, ull e) {
for (uint res = 1;; a = (ull)a * a % MOD) {
if (e & 1) res = (ull)res * a % MOD;
if ((e /= 2) == 0) return res;
}
}
constexpr uint InvMod(uint a) { return PowMod(a, MOD - 2); }
constexpr uint QUAD_NONRESIDUE = 3;
constexpr int LOG2_ORD = 23; // __builtin_ctz(MOD - 1)
constexpr uint ZETA = PowMod(QUAD_NONRESIDUE, (MOD - 1) >> LOG2_ORD);
constexpr uint INV_ZETA = InvMod(ZETA);
// 返回做 n 长 FFT 所需的单位根数组,长度为一半
std::pair<std::vector<uint>, std::vector<uint>> GetFFTRoot(int n) {
assert((n & (n - 1)) == 0);
if (n / 2 == 0) return {};
std::vector<uint> root(n / 2), inv_root(n / 2);
root[0] = inv_root[0] = 1;
for (int i = 0; (1 << i) < n / 2; ++i)
root[1 << i] = PowMod(ZETA, 1LL << (LOG2_ORD - i - 2)),
inv_root[1 << i] = PowMod(INV_ZETA, 1LL << (LOG2_ORD - i - 2));
for (int i = 1; i < n / 2; ++i)
root[i] = (ull)root[i - (i & (i - 1))] * root[i & (i - 1)] % MOD,
inv_root[i] =
(ull)inv_root[i - (i & (i - 1))] * inv_root[i & (i - 1)] % MOD;
return {root, inv_root};
}
void Butterfly(int n, uint a[], const uint root[]) {
assert((n & (n - 1)) == 0);
for (int i = n; i >= 2; i /= 2)
for (int j = 0; j < n; j += i)
for (int k = j; k < j + i / 2; ++k) {
const uint u = a[k];
a[k + i / 2] = (ull)a[k + i / 2] * root[j / i] % MOD;
if ((a[k] += a[k + i / 2]) >= MOD) a[k] -= MOD;
if ((a[k + i / 2] = u + MOD - a[k + i / 2]) >= MOD) a[k + i / 2] -= MOD;
}
}
void InvButterfly(int n, uint a[], const uint root[]) {
assert((n & (n - 1)) == 0);
for (int i = 2; i <= n; i *= 2)
for (int j = 0; j < n; j += i)
for (int k = j; k < j + i / 2; ++k) {
const uint u = a[k];
if ((a[k] += a[k + i / 2]) >= MOD) a[k] -= MOD;
a[k + i / 2] = (ull)(u + MOD - a[k + i / 2]) * root[j / i] % MOD;
}
}
void FFT(int n, uint a[], const uint root[]) { Butterfly(n, a, root); }
void InvFFT(int n, uint a[], const uint root[]) {
InvButterfly(n, a, root);
const uint inv_n = InvMod(n);
for (int i = 0; i < n; ++i) a[i] = (ull)a[i] * inv_n % MOD;
}
// 形式幂级数复合,求出 f(g) mod x^n 要求 g(0) = 0
std::vector<uint> FPSComposition(std::vector<uint> f, std::vector<uint> g,
int n) {
assert(g.empty() || g[0] == 0);
int len = 1;
while (len < n) len *= 2;
std::vector<uint> root, inv_root;
std::tie(root, inv_root) = GetFFTRoot(len * 4);
// [y^(-1)] (f(y) / (-g(x) + y)) mod x^n in R[x]((y^(-1)))
auto KinoshitaLi = [&](auto &&KinoshitaLi, const std::vector<uint> &P,
const std::vector<uint> &Q, int d, int n) {
assert((int)P.size() == d * n);
assert((int)Q.size() == d * n);
if (n == 1) return P;
std::vector<uint> dftQ(d * n * 4);
for (int i = 0; i < d; ++i)
for (int j = 0; j < n; ++j) dftQ[i * n * 2 + j] = Q[i * n + j];
dftQ[d * n * 2] = 1;
FFT(d * n * 4, dftQ.data(), root.data());
std::vector<uint> V(d * n * 2);
for (int i = 0; i < d * n * 4; i += 2)
V[i / 2] = (ull)dftQ[i] * dftQ[i + 1] % MOD;
InvFFT(d * n * 2, V.data(), inv_root.data());
if ((V[0] += MOD - 1) >= MOD) V[0] -= MOD;
for (int i = 1; i < d * 2; ++i)
for (int j = 0; j < n / 2; ++j) V[i * (n / 2) + j] = V[i * n + j];
V.resize(d * n);
const std::vector<uint> T = KinoshitaLi(KinoshitaLi, P, V, d * 2, n / 2);
std::vector<uint> dftT(d * n * 2);
for (int i = 0; i < d * 2; ++i)
for (int j = 0; j < n / 2; ++j) dftT[i * n + j] = T[i * (n / 2) + j];
FFT(d * n * 2, dftT.data(), root.data());
for (int i = 0; i < d * n * 4; i += 2) {
const uint u = dftQ[i];
dftQ[i] = (ull)dftT[i / 2] * dftQ[i + 1] % MOD;
dftQ[i + 1] = (ull)dftT[i / 2] * u % MOD;
}
InvFFT(d * n * 4, dftQ.data(), inv_root.data());
for (int i = 0; i < d; ++i)
for (int j = 0; j < n; ++j) dftQ[i * n + j] = dftQ[(i + d) * (n * 2) + j];
dftQ.resize(d * n);
return dftQ;
};
f.resize(len);
g.resize(len);
for (int i = 0; i < len; ++i) g[i] = (g[i] != 0 ? MOD - g[i] : 0);
std::vector<uint> res = KinoshitaLi(KinoshitaLi, f, g, 1, len);
res.resize(n);
return res;
}
std::pair<std::vector<uint>, std::vector<uint>> GetFactorial(int n) {
if (n == 0) return {};
std::vector<uint> factorial(n), inv_factorial(n);
factorial[0] = inv_factorial[0] = 1;
if (n == 1) return {factorial, inv_factorial};
std::vector<uint> inv(n);
inv[1] = 1;
for (int i = 2; i < n; ++i)
inv[i] = (ull)(MOD - MOD / i) * inv[MOD % i] % MOD;
for (int i = 1; i < n; ++i)
factorial[i] = (ull)factorial[i - 1] * i % MOD,
inv_factorial[i] = (ull)inv_factorial[i - 1] * inv[i] % MOD;
return {factorial, inv_factorial};
}
// f(x) |-> f(x + c)
std::vector<uint> TaylorShift(std::vector<uint> f, uint c) {
if (f.empty() || c == 0) return f;
const int n = f.size();
std::vector<uint> factorial, inv_factorial;
std::tie(factorial, inv_factorial) = GetFactorial(n);
for (int i = 0; i < n; ++i) f[i] = (ull)f[i] * factorial[i] % MOD;
std::vector<uint> g(n);
uint cp = 1;
for (int i = 0; i < n; ++i)
g[i] = (ull)cp * inv_factorial[i] % MOD, cp = (ull)cp * c % MOD;
int len = 1;
while (len < n * 2 - 1) len *= 2;
std::vector<uint> root, inv_root;
std::tie(root, inv_root) = GetFFTRoot(len);
f.resize(len);
g.resize(len);
FFT(len, f.data(), inv_root.data());
FFT(len, g.data(), root.data());
for (int i = 0; i < len; ++i) f[i] = (ull)f[i] * g[i] % MOD;
InvFFT(len, f.data(), root.data());
f.resize(n);
for (int i = 0; i < n; ++i) f[i] = (ull)f[i] * inv_factorial[i] % MOD;
return f;
}
// 多项式复合,求出 f(g) mod x^n
std::vector<uint> PolyComposition(const std::vector<uint> &f,
const std::vector<uint> &g, int n) {
if (g.empty() || g[0] == 0) return FPSComposition(f, g, n);
std::vector<uint> gg = g;
gg[0] = 0;
return FPSComposition(TaylorShift(f, g[0]), gg, n);
}
int main() {
int n, m;
std::scanf("%d%d", &n, &m);
std::vector<uint> f(n + 1), g(m + 1);
for (int i = 0; i <= n; ++i) std::scanf("%u", &f[i]);
for (int i = 0; i <= m; ++i) std::scanf("%u", &g[i]);
const std::vector<uint> res = PolyComposition(f, g, n + 1);
for (int i = 0; i <= n; ++i) std::printf("%u%c", res[i], " \n"[i == n]);
return 0;
}
形式幂级数的复合逆
现给出
根据 Lagrange 反演,对于
也就是我们如果能对
Kinoshita 和 Li 指出我们可以考虑二元有理函数
且这个问题有一个更一般的形式即 Power Projection 问题:我们考虑计算
当
那么
我们给出其伪代码:
同样的我们也可以直接导出
模板(P5809【模板】多项式复合逆)
代码相对于原算法作了一些简化及修改,使得代码更短。
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <tuple>
#include <utility>
#include <vector>
using uint = unsigned;
using ull = unsigned long long;
constexpr uint MOD = 998244353;
constexpr uint PowMod(uint a, ull e) {
for (uint res = 1;; a = (ull)a * a % MOD) {
if (e & 1) res = (ull)res * a % MOD;
if ((e /= 2) == 0) return res;
}
}
constexpr uint InvMod(uint a) { return PowMod(a, MOD - 2); }
constexpr uint QUAD_NONRESIDUE = 3;
constexpr int LOG2_ORD = 23; // __builtin_ctz(MOD - 1)
constexpr uint ZETA = PowMod(QUAD_NONRESIDUE, (MOD - 1) >> LOG2_ORD);
constexpr uint INV_ZETA = InvMod(ZETA);
// 返回做 n 长 FFT 所需的单位根数组,长度为一半
std::pair<std::vector<uint>, std::vector<uint>> GetFFTRoot(int n) {
assert((n & (n - 1)) == 0);
if (n / 2 == 0) return {};
std::vector<uint> root(n / 2), inv_root(n / 2);
root[0] = inv_root[0] = 1;
for (int i = 0; (1 << i) < n / 2; ++i)
root[1 << i] = PowMod(ZETA, 1LL << (LOG2_ORD - i - 2)),
inv_root[1 << i] = PowMod(INV_ZETA, 1LL << (LOG2_ORD - i - 2));
for (int i = 1; i < n / 2; ++i)
root[i] = (ull)root[i - (i & (i - 1))] * root[i & (i - 1)] % MOD,
inv_root[i] =
(ull)inv_root[i - (i & (i - 1))] * inv_root[i & (i - 1)] % MOD;
return {root, inv_root};
}
void Butterfly(int n, uint a[], const uint root[]) {
assert((n & (n - 1)) == 0);
for (int i = n; i >= 2; i /= 2)
for (int j = 0; j < n; j += i)
for (int k = j; k < j + i / 2; ++k) {
const uint u = a[k];
a[k + i / 2] = (ull)a[k + i / 2] * root[j / i] % MOD;
if ((a[k] += a[k + i / 2]) >= MOD) a[k] -= MOD;
if ((a[k + i / 2] = u + MOD - a[k + i / 2]) >= MOD) a[k + i / 2] -= MOD;
}
}
void InvButterfly(int n, uint a[], const uint root[]) {
assert((n & (n - 1)) == 0);
for (int i = 2; i <= n; i *= 2)
for (int j = 0; j < n; j += i)
for (int k = j; k < j + i / 2; ++k) {
const uint u = a[k];
if ((a[k] += a[k + i / 2]) >= MOD) a[k] -= MOD;
a[k + i / 2] = (ull)(u + MOD - a[k + i / 2]) * root[j / i] % MOD;
}
}
void FFT(int n, uint a[], const uint root[]) { Butterfly(n, a, root); }
void InvFFT(int n, uint a[], const uint root[]) {
InvButterfly(n, a, root);
const uint inv_n = InvMod(n);
for (int i = 0; i < n; ++i) a[i] = (ull)a[i] * inv_n % MOD;
}
// 形式幂级数复合,求出 f(g) mod x^n 要求 g(0) = 0
std::vector<uint> FPSComposition(std::vector<uint> f, std::vector<uint> g,
int n) {
assert(g.empty() || g[0] == 0);
int len = 1;
while (len < n) len *= 2;
std::vector<uint> root, inv_root;
std::tie(root, inv_root) = GetFFTRoot(len * 4);
// [y^(-1)] (f(y) / (-g(x) + y)) mod x^n in R[x]((y^(-1)))
auto KinoshitaLi = [&](auto &&KinoshitaLi, const std::vector<uint> &P,
const std::vector<uint> &Q, int d, int n) {
assert((int)P.size() == d * n);
assert((int)Q.size() == d * n);
if (n == 1) return P;
std::vector<uint> dftQ(d * n * 4);
for (int i = 0; i < d; ++i)
for (int j = 0; j < n; ++j) dftQ[i * n * 2 + j] = Q[i * n + j];
dftQ[d * n * 2] = 1;
FFT(d * n * 4, dftQ.data(), root.data());
std::vector<uint> V(d * n * 2);
for (int i = 0; i < d * n * 4; i += 2)
V[i / 2] = (ull)dftQ[i] * dftQ[i + 1] % MOD;
InvFFT(d * n * 2, V.data(), inv_root.data());
if ((V[0] += MOD - 1) >= MOD) V[0] -= MOD;
for (int i = 1; i < d * 2; ++i)
for (int j = 0; j < n / 2; ++j) V[i * (n / 2) + j] = V[i * n + j];
V.resize(d * n);
const std::vector<uint> T = KinoshitaLi(KinoshitaLi, P, V, d * 2, n / 2);
std::vector<uint> dftT(d * n * 2);
for (int i = 0; i < d * 2; ++i)
for (int j = 0; j < n / 2; ++j) dftT[i * n + j] = T[i * (n / 2) + j];
FFT(d * n * 2, dftT.data(), root.data());
for (int i = 0; i < d * n * 4; i += 2) {
const uint u = dftQ[i];
dftQ[i] = (ull)dftT[i / 2] * dftQ[i + 1] % MOD;
dftQ[i + 1] = (ull)dftT[i / 2] * u % MOD;
}
InvFFT(d * n * 4, dftQ.data(), inv_root.data());
for (int i = 0; i < d; ++i)
for (int j = 0; j < n; ++j) dftQ[i * n + j] = dftQ[(i + d) * (n * 2) + j];
dftQ.resize(d * n);
return dftQ;
};
f.resize(len);
g.resize(len);
for (int i = 0; i < len; ++i) g[i] = (g[i] != 0 ? MOD - g[i] : 0);
std::vector<uint> res = KinoshitaLi(KinoshitaLi, f, g, 1, len);
res.resize(n);
return res;
}
// Power Projection: [x^(n-1)] (fg^i) for i=0,..,n-1 要求 g(0) = 0
std::vector<uint> PowerProjection(std::vector<uint> f, std::vector<uint> g,
int n) {
assert(g.empty() || g[0] == 0);
int len = 1;
while (len < n) len *= 2;
std::vector<uint> root, inv_root;
std::tie(root, inv_root) = GetFFTRoot(len * 4);
// [x^(n-1)] (f(x) / (-g(x) + y)) in R[x]((y^(-1)))
auto KinoshitaLi = [&](auto &&KinoshitaLi, std::vector<uint> P,
std::vector<uint> Q, int d,
int n) -> std::vector<uint> {
assert((int)P.size() == d * n);
assert((int)Q.size() == d * n);
if (n == 1) return P;
std::vector<uint> dftQ(d * n * 4), dftP(d * n * 4);
for (int i = 0; i < d; ++i)
for (int j = 0; j < n; ++j)
dftP[i * n * 2 + j] = P[i * n + j], dftQ[i * n * 2 + j] = Q[i * n + j];
dftQ[d * n * 2] = 1;
FFT(d * n * 4, dftP.data(), root.data());
FFT(d * n * 4, dftQ.data(), root.data());
P.resize(d * n * 2);
Q.resize(d * n * 2);
for (int i = 0; i < d * n * 4; i += 2) {
const uint u = (ull)dftP[i] * dftQ[i + 1] % MOD;
const uint v = (ull)dftP[i + 1] * dftQ[i] % MOD;
P[i / 2] = (ull)(u + MOD - v) * inv_root[i / 2] % MOD;
if (P[i / 2] & 1) P[i / 2] += MOD;
P[i / 2] /= 2;
Q[i / 2] = (ull)dftQ[i] * dftQ[i + 1] % MOD;
}
InvFFT(d * n * 2, P.data(), inv_root.data());
InvFFT(d * n * 2, Q.data(), inv_root.data());
if ((Q[0] += MOD - 1) >= MOD) Q[0] -= MOD;
for (int i = 1; i < d * 2; ++i)
for (int j = 0; j < n / 2; ++j)
P[i * (n / 2) + j] = P[i * n + j], Q[i * (n / 2) + j] = Q[i * n + j];
P.resize(d * n);
Q.resize(d * n);
return KinoshitaLi(KinoshitaLi, P, Q, d * 2, n / 2);
};
f.insert(f.begin(), len - n, 0);
f.resize(len);
g.resize(len);
for (int i = 0; i < len; ++i) g[i] = (g[i] != 0 ? MOD - g[i] : 0);
std::vector<uint> res = KinoshitaLi(KinoshitaLi, f, g, 1, len);
std::reverse(res.begin(), res.end());
res.resize(n);
return res;
}
// 形式幂级数幂函数,计算 g^e mod x^n 要求 g(0) = 1
std::vector<uint> FPSPow1(std::vector<uint> g, uint e, int n) {
assert(!g.empty() && g[0] == 1);
if (n == 1) return std::vector<uint>{1u};
std::vector<uint> inv(n), f(n);
inv[1] = f[0] = 1;
for (int i = 2; i < n; ++i)
inv[i] = (ull)(MOD - MOD / i) * inv[MOD % i] % MOD;
for (int i = 1; i < n; ++i)
f[i] = (ull)f[i - 1] * (e + MOD + 1 - i) % MOD * inv[i] % MOD;
g[0] = 0;
return FPSComposition(f, g, n);
}
// 形式幂级数复合逆
// 计算 g mod x^n 满足 g(f) = f(g) = x 要求 g(0) = 0 且 g'(0) ≠ 0
std::vector<uint> FPSReversion(std::vector<uint> f, int n) {
assert(f.size() >= 2 && f[0] == 0 && f[1] != 0);
if (n == 1) return std::vector<uint>{0u};
f.resize(n);
const uint invf1 = InvMod(f[1]);
uint invf1p = 1;
for (int i = 0; i < n; ++i)
f[i] = (ull)f[i] * invf1p % MOD, invf1p = (ull)invf1p * invf1 % MOD;
std::vector<uint> inv(n);
inv[1] = 1;
for (int i = 2; i < n; ++i)
inv[i] = (ull)(MOD - MOD / i) * inv[MOD % i] % MOD;
std::vector<uint> proj = PowerProjection(std::vector<uint>{1u}, f, n);
for (int i = 1; i < n; ++i)
proj[i] = (ull)proj[i] * (n - 1) % MOD * inv[i] % MOD;
std::reverse(proj.begin(), proj.end());
std::vector<uint> res = FPSPow1(proj, InvMod(MOD + 1 - n), n - 1);
for (int i = 0; i < n - 1; ++i) res[i] = (ull)res[i] * invf1 % MOD;
res.insert(res.begin(), 0);
return res;
}
int main() {
int n;
std::scanf("%d", &n);
std::vector<uint> f(n);
for (int i = 0; i < n; ++i) std::scanf("%u", &f[i]);
const std::vector<uint> res = FPSReversion(f, n);
for (int i = 0; i < n; ++i) std::printf("%u%c", res[i], " \n"[i + 1 == n]);
return 0;
}
由转置原理导出
Power Projection 问题是 Modular Composition 的转置,Kinoshita 和 Li 指出我们前文的复合算法可以由 Power Projection 算法直接转置得到。同样的,如果优化可以应用于 Power Projection 算法,其也可以应用于 Modular Composition 算法。我们省略细节。
参考文献
- Yasunori Kinoshita, Baitian Li.Power Series Composition in Near-Linear Time. FOCS 2024.
- Alin Bostan, Ryuhei Mori.A Simple and Fast Algorithm for Computing the N-th Term of a Linearly Recurrent Sequence. SOSA 2021: 118-132
- R. P. Brent and H. T. Kung. 1978.Fast Algorithms for Manipulating Formal Power Series. J. ACM 25, 4 (Oct. 1978), 581–595.
- Daniel J. Bernstein. "Fast multiplication and its applications." Pages 325–384 in Algorithmic number theory: lattices, number fields, curves and cryptography, edited by Joe Buhler, Peter Stevenhagen, Cambridge University Press, 2008, ISBN 978-0521808545.
创建日期: 2024年12月16日