关于指令集的时间复杂度
查看原帖
关于指令集的时间复杂度
223797
Remake_楼主2020/8/23 17:51

说好的只优化14\frac{1}{4}的常数呢?为啥我直接就用指令集艹过去了啊,按数据来说跑不过去啊qwq。

#pragma GCC optimize("Ofast,no-stack-protector,unroll-loops,fast-math")
#pragma GCC target("sse,sse2,sse3,ssse3,sse4.1,sse4.2,avx,avx2,popcnt,tune=native")
#include <bits/stdc++.h>
#include <emmintrin.h>
#include <immintrin.h>
#define long_long __m256i
#define BUILD(a, b, c, d) _mm256_set_epi64x(d, c, b, a)
#define SET(a) _mm256_set1_epi64x(a)
#define ADD(a, b) _mm256_add_epi64(a, b)
#define EQUAL(a, b) _mm256_cmpeq_epi64(a, b)
#define BIGGER(a, b) _mm256_cmpgt_epi64(a, b)
#define AND(a, b) _mm256_and_si256(a, b)
using namespace std;
long long n, m, *num, tot, tmp[5], opt, x, y, z;
long_long A[100005 >> 2];
inline long long read()
{
    long long x = 0, f = 1;
    char ch = getchar();
    while (!isdigit(ch))
    {
        if (ch == '-')
            f = -1;
        ch = getchar();
    }
    while (isdigit(ch))
    {
        x = x * 10 + ch - 48;
        ch = getchar();
    }
    return x * f;
}
inline void write(long long x)
{
    if (x < 0)
        x = ~x + 1, putchar('-');
    if (x > 9)
        write(x / 10);
    putchar(x % 10 + '0');
}
void change(long long l, long long r, long long v)
{
    while ((l & 3) && l < r)
        num[l++] += v;
    if (l == r)
        return;
    while (r & 3)
        num[--r] += v;
    if (l == r)
        return;
    long_long all_v = SET(v);
    for (l >>= 2, r >>= 2; l < r; ++l)
        A[l] = ADD(A[l], all_v);
    return;
}
long long query(long long l, long long r)
{
    long long cnt = 0;
    while ((l & 3) && l < r)
        cnt += num[l++];
    if (l == r)
        return cnt;
    while (r & 3)
        cnt += num[--r];
    if (l == r)
        return cnt;
    long_long ans = SET(0);
    for (l >>= 2, r >>= 2; l < r; ++l)
        ans = ADD(ans, A[l]);
    for (int i = 0; i < 4; ++i)
        cnt += ans[i];
    return cnt;
}
int main()
{
    cin >> n >> m;
    while (n)
    {
        for (int i = 0; i < min(int(n), 4); i++)
            tmp[i] = read();
        n -= min(int(n), 4);
        A[tot++] = BUILD(tmp[0], tmp[1], tmp[2], tmp[3]);
    }
    num = (long long *)A;
    for (int i = 1; i <= m; i++)
    {
        opt = read();
        x = read();
        y = read();
        if (opt == 1)
        {
            z = read();
            change(x - 1, y, z);
        }
        if (opt == 2)
        {
            write(query(x - 1, y));
            putchar('\n');
        }
    }
}
2020/8/23 17:51
加载中...