我的NTT比FFT慢怎么办/kel 没加任何常数优化的FFT
#include<bits/stdc++.h>
using namespace std;
const double pi=3.1415926535;
struct comp
{
double x,y;
comp operator+(comp b) {return (comp){x+b.x,y+b.y};}
comp operator-(comp b) {return (comp){x-b.x,y-b.y};}
comp operator*(comp b) {return (comp){x*b.x-y*b.y,x*b.y+y*b.x};}
}a[400005],b[400005];int n,m,T,t,r[400005];
inline void FFT(int n,comp *a,int fla)
{
for(int i=0;i<n;i++) if(r[i]<i) swap(a[i],a[r[i]]);
for(int mid=1;mid<n;mid<<=1)
{
comp wn=(comp){cos(pi/mid),fla*sin(pi/mid)},w;
for(int i=0,k;i<n;i+=(mid<<1))
for(k=0,w=(comp){1,0};k<mid;k++,w=w*wn)
{comp x=a[i+k],y=w*a[i+mid+k];a[i+k]=x+y,a[i+mid+k]=x-y;}
}
}
int main()
{
scanf("%d%d",&n,&m);
for(int i=0;i<=n;i++) scanf("%lf",&a[i].x);
for(int i=0;i<=m;i++) scanf("%lf",&b[i].x);
T=1,t=0;while(T<=n+m) T<<=1,t++;
for(int i=0;i<T;i++) r[i]=(r[i>>1]>>1)|((i&1)<<(t-1));
FFT(T,a,1),FFT(T,b,1);for(int i=0;i<T;i++) a[i]=a[i]*b[i];
FFT(T,a,-1);for(int i=0;i<=n+m;i++) printf("%d%c",(int)(a[i].x/T+0.5),i==n+m?'\n':' ');
return 0;
}
加了一大堆常数优化的NTT
#include<bits/stdc++.h>
using namespace std;
const int P=998244353;int n,m,a[400005],b[400005],r[400005],p[400005][2],T,t;
inline int qpow(int x,int q=P-2) {int r=1;for(;q;q>>=1,x=1ll*x*x%P) if(q&1) r=1ll*x*r%P;return r;}
inline int read()
{
char c=getchar();int t=0,f=0;
for(;c<'0'||c>'9';c=getchar()) if(c=='-') f=1;
for(;c>='0'&&c<='9';c=getchar()) t=(t<<3)+(t<<1)+c-'0';
return f?-t:t;
}
inline void NTT(int n,int *a,int fla)
{
for(int i=0;i<n;i++) if(i<r[i]) swap(a[i],a[r[i]]);
for(int len=1,cnt=1;len<n;len<<=1,cnt++)
{
int wn=p[cnt][fla];
for(int i=0;i<n;i+=(len<<1))
for(int k=0,w=1;k<len;k++,w=1ll*w*wn%P)
{int x=a[i+k],y=1ll*a[i+len+k]*w%P;a[i+k]=(x+y)%P,a[i+len+k]=(x-y+P)%P;}
}
}
int main()
{
n=read(),m=read(),p[23][0]=qpow(3,119);p[23][1]=qpow(332748118,119);
for(int i=0;i<=n;i++) a[i]=read();
for(int i=0;i<=m;i++) b[i]=read();
T=1,t=0;while(T<=n+m) T<<=1,t++;
for(int i=0;i<T;i++) r[i]=((r[i>>1]>>1)|((i&1)<<(t-1)));
for(int i=22;i>=0;i--) p[i][0]=1ll*p[i+1][0]*p[i+1][0]%P,p[i][1]=1ll*p[i+1][1]*p[i+1][1]%P;
NTT(T,a,1),NTT(T,b,1);
for(int i=0;i<T;i++) a[i]=1ll*a[i]*b[i]%P;
NTT(T,a,0);int inv=qpow(T);
for(int i=0;i<=n+m;i++) printf("%lld%c",1ll*a[i]*inv%P,i==n+m?'\n':' ');
return 0;
}