萌新想拿道简单题练指令集,结果只有10分,求大佬帮助qwq
(感觉是答案统计出了问题,但是不大确定具体症结……/fad)
//ref: https://www.luogu.com.cn/blog/ouuan/avx-optimize
//ref: https://software.intel.com/sites/landingpage/IntrinsicsGuide/
#include <bits/stdc++.h>
#pragma GCC optimize("Ofast,no-stack-protector,unroll-loops,fast-math")
#pragma GCC target("sse,sse2,sse3,ssse3,sse4.1,sse4.2,avx,avx2,popcnt,tune=native")
#include <immintrin.h>
#include <emmintrin.h>
const int N=200010;
typedef __m256i oak;
int n,m,tot,*a,op;
oak A[N>>3];
char s[N];
void modify(int l,int r){
while ((l&7)&&l<r) a[l++]^=1;
if (l==r) return;
while (r&7) a[--r]^=1;
if (l==r) return; //左右散块
oak t=_mm256_set1_epi32(1); //01互换=与1异或
for (l>>=3,r>>=3;l<r;++l) A[l]=_mm256_xor_si256(A[l],t); //_mm256_xor_epi32的替代品,洛谷疑似不支持avi512
}
int query(int l,int r){
int out=0;
while ((l&7)&&l<r) out+=a[l++];
if (l==r) return out;
while (r&7) out+=a[--r];
if (l==r) return out;
oak t=_mm256_set1_epi32(1);
oak ans=_mm256_set1_epi32(0);
oak cp=_mm256_set1_epi32(0);
for (l>>=3,r>>=3;l<r;++l) ans=_mm256_add_epi32(ans,_mm256_and_si256(t,_mm256_cmpgt_epi32(A[l],cp)));
for (int i=0;i<8;++i) out+=ans[i];
return out;//答案统计基于@ouuan巨佬的样题,但不知道正确性
}
int main()
{
int i,l,r,aa[8];
scanf("%d%d",&n,&m);
scanf("%s",s);
while (n){
if (n<8){
for (i=0;i<n;++i) aa[i]=s[i]-'0';
n=0;
}
else{
for (i=0;i<8;++i) aa[i]=s[i]-'0';
n-=8;
}
A[tot++]=_mm256_set_epi32(aa[7],aa[6],aa[5],aa[4],aa[3],aa[2],aa[1],aa[0]);
}//按块读入
a=(int*)&A;
while (m--){
scanf("%d%d%d",&op,&l,&r);
if (!op) modify(l-1,r);
else printf("%d\n",query(l-1,r));
}
return 0;
}