#54932: TOP 1 解題報告


321qwedsa000@gmail.com (灝)


#pragma GCC optimize("O3,unroll-loops,fast-math")
#pragma GCC target("avx2,bmi,bmi2,lzcnt,popcnt")
#include <iostream>
#include <array>
#include <utility>
#include <cstdio>

inline bool read_int(int &x) {
    int c = getchar_unlocked();
    while (c != EOF && c <= ' ') c = getchar_unlocked();
    if (c == EOF) return false;
    bool neg = false;
    if (c == '-') { neg = true; c = getchar_unlocked(); }
    x = 0;
    while (c >= '0' && c <= '9') {
        x = (x << 3) + (x << 1) + (c - '0');
        c = getchar_unlocked();
    }
    if (neg) x = -x;
    return true;
}

inline void write_int(int x) {
    if (x < 0) { putchar_unlocked('-'); x = -x; }
    if (x == 0) { putchar_unlocked('0'); putchar_unlocked(' '); return; }
    char buf[12];
    int p = 0;
    while (x) { buf[p++] = (x % 10) + '0'; x /= 10; }
    while (p--) putchar_unlocked(buf[p]);
    putchar_unlocked(' ');
}

template <int N>
struct TMP_Unrolled {
    __attribute__((always_inline))
    static void read(int* __restrict ptr) {
        read_int(*ptr);
        TMP_Unrolled<N - 1>::read(ptr + 1);
    }

    __attribute__((always_inline))
    static void print(const int* __restrict ptr) {
        write_int(*(ptr - 1));
        TMP_Unrolled<N - 1>::print(ptr - 1);
    }
};

template <>
struct TMP_Unrolled<0> {
    __attribute__((always_inline))
    static void read(int* __restrict) {}
    __attribute__((always_inline))
    static void print(const int* __restrict) {}
};

using ReadFunc = void(*)(int* __restrict);
using PrintFunc = void(*)(const int* __restrict);

template<size_t... Is>
constexpr auto build_read_table(std::index_sequence<Is...>) {
    return std::array<ReadFunc, sizeof...(Is)>{ &TMP_Unrolled<Is>::read... };
}

template<size_t... Is>
constexpr auto build_print_table(std::index_sequence<Is...>) {
    return std::array<PrintFunc, sizeof...(Is)>{ &TMP_Unrolled<Is>::print... };
}

constexpr int CHUNK_SIZE = 32;
constexpr auto read_jump = build_read_table(std::make_index_sequence<CHUNK_SIZE>{});
constexpr auto print_jump = build_print_table(std::make_index_sequence<CHUNK_SIZE>{});

constexpr int MAX_N = 1000005;
int arr[MAX_N];

int main() {
    int n;
    while (read_int(n)) {
        int chunks = n / CHUNK_SIZE;
        int rem = n % CHUNK_SIZE;

        int* p = arr;
        for (int i = 0; i < chunks; ++i) {
            TMP_Unrolled<CHUNK_SIZE>::read(p);
            p += CHUNK_SIZE;
        }
        read_jump[rem](p);

        p = arr + n;
        for (int i = 0; i < chunks; ++i) {
            TMP_Unrolled<CHUNK_SIZE>::print(p);
            p -= CHUNK_SIZE;
        }
        print_jump[rem](p);
        
        putchar_unlocked('\n');
    }
    return 0;
}