#include<cstdio>
#include<algorithm>
#include<cctype>
#include<cstring>
#include<cmath>
#include<queue>
#include<map>
#include<set>
using namespace std;
#define bomb exit(1)
#define INF 1061109567
#define LINF 4557430888798830399ll
#define pprint(x) print(x),putchar(' ')
#define fprint(x) print(x),putchar('\n')
#define EE(x); struct edge { int nxt,to,w; }e[M << 1]; int head[N],ecnt = 1;\
void add(int u,int v,ll w = 0) { e[++ecnt].w = w,e[ecnt].to = v,e[ecnt].nxt = head[u];head[u] = ecnt; }\
void add_edge(int u,int v,ll w = 0) { add(u,v,w),add(v,u,w * x); }
#define ll long long
const double pi = acos(-1.0);
int mod = 998244353;
void up(int &a) { a += a >> 31 & mod; }
#define eps 0.0000000001
#define sqr(x) ((x) * (x))
//#define getchar() (SS == TT && (TT = (SS = BB) + fread(BB,1,1 << 15,stdin),TT == SS) ? EOF : *SS++)
//char BB[1 << 15],*SS = BB,*TT = BB;
ll read()
{
ll x = 0;int f = 1;char ch = getchar();
for(;!isdigit(ch);ch = getchar()) if(ch == '-') f = -1;
for(;isdigit(ch);ch = getchar()) x = x * 10 + (ch ^ 48);
return x * f;
}
void print(ll x)
{
if(x < 0) putchar('-'),x = -x;
if(x > 9) print(x / 10);putchar(x % 10 + '0');
}
int qpow(int a,int b) { int ans = 1;for(;b;a = (ll)a * a % mod,b >>= 1) if(b & 1) ans = (ll)ans * a % mod; return ans; }
const int N = 2000010,G = 3,Gi = 332748118;
int rev[N],inv[N];
void NTT(int *a,int n,int type)
{
for(int i = 0;i < n;i++) if(i < rev[i]) swap(a[i],a[rev[i]]);
for(int l = 2;l <= n;l <<= 1)
{
int m = l / 2;ll wn = qpow(~type ? G : Gi,(mod - 1) / l);
for(int *p = a;p != a + n;p += l)
for(int i = 0,w = 1;i < m;i++,w = (ll)w * wn % mod)
{
int t = (ll)w * p[i + m] % mod;
up(p[i + m] = p[i] - t);
up(p[i] = p[i] + t - mod);
}
}if(~type) return;
int inv = qpow(n,mod - 2);
for(int i = 0;i < n;i++) a[i] = (ll)a[i] * inv % mod;
}
void calc_rev(int &n,int &lim,int m)
{
n = 1,lim = 0;while(n < m) n <<= 1,lim++;
for(int i = 1;i < n;i++) rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << lim - 1);
}
void Mul(int *a,int *b,int *c,int m1,int m2)
{
static int x[N],y[N];
int n,lim;calc_rev(n,lim,m1 + m2);
for(int i = 0;i < m1;i++) x[i] = a[i];for(int i = m1;i < n;i++) x[i] = 0;
for(int i = 0;i < m2;i++) y[i] = b[i];for(int i = m2;i < n;i++) y[i] = 0;
NTT(x,n,1),NTT(y,n,1);for(int i = 0;i < n;i++) c[i] = (ll)x[i] * y[i] % mod;
NTT(c,n,-1);for(int i = m1 + m2 - 1;i < n;i++) c[i] = 0;
}
void Inv(int *a,int *b,int len)
{
static int c[N];
if(len == 1) { b[0] = qpow(a[0],mod - 2);return; }
Inv(a,b,len + 1 >> 1);
int n,lim;calc_rev(n,lim,len << 1);
for(int i = 0;i < len;i++) c[i] = a[i];for(int i = len;i < n;i++) c[i] = 0;
NTT(c,n,1),NTT(b,n,1);
for(int i = 0;i < n;i++) b[i] = (ll)(2 - (ll)c[i] * b[i] % mod + mod) * b[i] % mod;
NTT(b,n,-1);for(int i = len;i < n;i++) b[i] = 0;
}
void Der(int *a,int *b,int len) { for(int i = 1;i < len;i++) b[i - 1] = (ll)a[i] * i % mod;b[len - 1] = 0; }
void Inte(int *a,int *b,int len) { for(int i = len - 1;i;i--) b[i] = (ll)a[i - 1] * inv[i] % mod;b[0] = 0; }
void Ln(int *a,int *b,int len)
{
static int c[N],inv[N];
memset(inv,0,sizeof(inv));
Der(a,c,len),Inv(a,inv,len);
Mul(c,inv,c,len,len);
Inte(c,b,len);
}
void Exp(int *a,int *b,int len)
{
static int c[N];
if(len == 1) { b[0] = 1;return; }
Exp(a,b,len + 1 >> 1);
int n,lim;calc_rev(n,lim,len << 1);
Ln(b,c,len);
for(int i = 0;i < n;i++) up(c[i] = a[i] - c[i]);
c[0]++;for(int i = len;i < n;i++) c[i] = b[i] = 0;
Mul(b,c,b,len,len);for(int i = len;i < n;i++) b[i] = 0;
}
在做 P5860 时,我的 Exp 6 秒跑不过 500000
做板子的时候我就发现它跑得挺慢的,但是我不知道为什么
哪位大佬可以帮忙看下,是我写假了嘛???