#include <mpi.h>
#include <algorithm>

int main(int argc, char** argv) {
    int provided;
    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
    constexpr int N = 4;
    unsigned short allUniform[N + 1] = { };
    MPI_Op op;
    auto f = [](void* in, void* inout, int*, MPI_Datatype*) -> void {
        unsigned short* input = static_cast<unsigned short*>(in);
        unsigned short* output = static_cast<unsigned short*>(inout);
        output[0] = std::max(output[0], input[0]);
        output[1] = std::max(output[1], input[1]);
        output[2] = output[2] & input[2];
        output[3] = output[3] & input[3];
        if(N == 4)
            output[4] = output[4] & input[4];
    };
    MPI_Op_create(f, 1, &op);
    MPI_Allreduce(MPI_IN_PLACE, allUniform, N + 1, MPI_UNSIGNED_SHORT, op, MPI_COMM_WORLD);
    MPI_Op_free(&op);
    MPI_Finalize();
}

