求助,Montgomery 乘法比暴力乘法取模慢是个啥回事啊(至少在我的机子上是这样的)
// Qiuly AK IOI!
#include<cstdio>
#include<cstring>
#include<cctype>
#include<cmath>
#include<iostream>
#include<algorithm>
#include<random>
#pragma GCC optimize("Ofast")
#pragma GCC optimize("unroll-loops")
using namespace std;
typedef int ll;
typedef unsigned int ul;
typedef long long int li;
typedef unsigned long long int ull;
const ll MAXN=262151,MOD=998244353,G=3,INVG=332748118;
struct ModInt{
ll v;
ModInt(ll v=0)
{
this->v=v;
}
};
struct Mont{
static ul mod,inv,r2;
ul v;
Mont(ll x=0)
{
v=reduce((ull)x*r2);
}
inline static void set(ull md)
{
mod=inv=md,inv*=2-inv*md,inv*=2-inv*md;
inv*=2-inv*md,inv*=2-inv*md,r2=-(ull)(md)%md;
}
inline ul reduce(ull x)
{
ul y=(ul)(x>>32)-(ul)(((ull)((ul)x*inv)*mod)>>32);
return (ll)(y)<0?y+mod:y;
}
inline Mont operator *(Mont &rhs)
{
v=reduce((ull)v*rhs.v);
return *this;
}
inline ul get()
{
return reduce(v);
}
};
ul Mont::mod,Mont::inv,Mont::r2;
ll x[100000001],y[100000001];
inline void fast()
{
Mont r,r2;
for(register int i=1;i<=10000000;i++)
{
r=Mont(x[i]),r2=Mont(y[i]),r=r*r2;
}
}
inline void slow()
{
ll x01;
for(register int i=1;i<=10000000;i++)
{
x01=(li)x[i]*y[i]%MOD;
}
}
int main()
{
Mont::set(MOD);
mt19937 mt(123123);
for(register int i=1;i<=10000000;i++)
{
x[i]=mt()%MOD,y[i]=mt()%MOD;
}
clock_t t0=clock();
fast();
clock_t t1=clock();
slow();
clock_t t2=clock();
printf("%.3lf s\n%.3lf s\n",(double)(t1-t0)/CLOCKS_PER_SEC,(double)(t2-t1)/CLOCKS_PER_SEC);
}