import groovy.transform.Field

properties([disableConcurrentBuilds(abortPrevious: true)])
@Field def DO_RUN=true
@Field def TARGET="main"
@Field def SCRIPT_LOCATION="upstream/libfabric/contrib/intel/jenkins"
@Field def RELEASE=false
@Field def BUILD_MODES=["reg", "dbg", "dl"]
@Field def PYTHON_VERSION="3.9"
@Field def TIMEOUT="7200"

def run_python(version, command, output=null) {
  if (output != null)
    sh "python$version $command >> $output"
  else
    sh "python$version $command"
}

def slurm_batch(partition, node_num, output, command) {
  
  try {
    sh """sbatch --partition=${partition} -N ${node_num} \
          --wait -o ${output} --open-mode=append \
          --wrap=\'env; timeout $TIMEOUT ${command}\'
       """
  } catch (Exception e) {
    sh "scancel \$(cat ${output} | grep SLURM_JOBID | cut -d \"=\" -f 2)"
    sh "cat ${output}"
    error("Build failed ${e}")
  }
  sh "cat ${output}"
}

def run_fabtests(stage_name, hw, partition, node_num, prov, util=null,
                 user_env=null, way=null) {
  def command = "python3.9 ${RUN_LOCATION}/runtests.py --build_hw=${hw}"
  def opts = "--prov=${prov} --test=fabtests"
  def modes = BUILD_MODES
  if (util)
    opts = "${opts} --util=${util}"

  if (user_env)
    opts = "${opts} --user_env ${user_env}"

  if (way) {
    opts = "${opts} --way ${way}"
    stage_name = "${stage_name}_${way}"
    modes = ["reg"]
  }

  for (mode in modes) {
    echo "Running $stage_name fabtests $mode"
    slurm_batch("${partition}", "${node_num}",
                "${env.LOG_DIR}/${stage_name}_fabtests_${mode}",
                "${command} ${opts} --ofi_build_mode=${mode}")
  }

  echo "${stage_name} completed."
}

def run_middleware(providers, stage_name, test, hw, partition, node_num,
                   mpi=null, imb_grp=null) {
  def base_cmd = "python3.9 ${RUN_LOCATION}/runtests.py --test=${test} --build_hw=${hw}"
  def opts = ""
  def prefix = "${env.LOG_DIR}/${stage_name}_"
  def suffix = "_${test}_reg"
  if (mpi) {
    base_cmd = "${base_cmd} --mpi=${mpi}"
    suffix = "_${mpi}${suffix}"
  }

  if (imb_grp)
    base_cmd = "${base_cmd} --imb_grp=${imb_grp}"

  if (env.WEEKLY.toBoolean())
    base_cmd = "${base_cmd} --weekly=${env.WEEKLY}"
    
  for (prov in providers) {
    if (prov[1]) {
      echo "Running ${prov[0]}-${prov[1]} ${stage_name}"
      opts = "--prov=${prov[0]} --util=${prov[1]}"
      output = "${prefix}${prov[0]}-${prov[1]}${suffix}"
    } else {
      echo "Running ${prov[0]} ${stage_name}"
      opts = "--prov=${prov[0]}"
      output = "${prefix}${prov[0]}${suffix}"
    }

    slurm_batch("${partition}", "${node_num}", "${output}",
                "${base_cmd} ${opts}")
  }
}

def gather_logs(cluster, key, dest, source) {
  def address = "${env.USER}@${cluster}"

  try {
    sh "scp -i ${key} ${address}:${source}/* ${dest}/"
  } catch (Exception e) {
    echo "Caught exception ${e} when transfering files from ${cluster}"
  }
}

def summarize(item, verbose=false, release=false, send_mail=false) {
  def cmd = "${RUN_LOCATION}/summary.py --summary_item=all"
  if (verbose) {
    cmd = "${cmd} -v "
  }
  if (release) {
    cmd = "${cmd} --release "
  }
  if (send_mail.toBoolean()) {
    cmd = "${cmd} --send_mail "
  }

  run_python(PYTHON_VERSION, cmd)
}

def save_summary() {
  sh """
    mkdir -p ${env.WORKSPACE}/internal
    rm -rf ${env.WORKSPACE}/internal/*
    git clone https://${env.PAT}@github.com/${env.INTERNAL} ${env.WORKSPACE}/internal
    cd ${env.WORKSPACE}/internal
    mkdir -p ${env.WORKSPACE}/internal/summaries
    cp ${env.WORKSPACE}/summary_*.log ${env.WORKSPACE}/internal/summaries/
    git add ${env.WORKSPACE}/internal/summaries/
    git commit -am \"add ${env.JOB_NAME}'s summary\"
    git pull -r origin master
    git push origin master
  """
}

def checkout_upstream() {
  def loc = "${env.WORKSPACE}/upstream/libfabric"
  sh """
    if [[ ! -d ${env.WORKSPACE}/upstream ]]; then
      mkdir -p ${loc}
    else
      rm -rf ${env.WORKSPACE}/upstream && mkdir -p ${loc}
    fi

    git clone --branch ${TARGET} ${env.UPSTREAM} ${loc}
  """
}

def checkout_ci_resources() {
  sh """
    if [[ ! -d ${env.WORKSPACE}/upstream ]]; then
      mkdir ${env.WORKSPACE}/ci_resources
    else
      rm -rf ${env.WORKSPACE}/ci_resources && mkdir ${env.WORKSPACE}/ci_resources
    fi

    git clone ${env.CI_RESOURCES} ${env.WORKSPACE}/ci_resources

  """
}

def checkout_external_resources() {
  checkout_ci_resources()
  checkout_upstream()
}

def generate_diff(def branch_name, def output_loc) {
  sh """
    git remote add mainRepo ${env.UPSTREAM}
    git fetch mainRepo
    git diff --name-only HEAD..mainRepo/${branch_name} > ${output_loc}/commit_id
    git remote remove mainRepo
  """
}

def generate_release_num(def branch_name, def output_loc) {
  sh """
    git remote add mainRepo ${env.UPSTREAM}
    git fetch mainRepo
    git diff mainRepo/${branch_name}:Makefile.am Makefile.am > \
        ${output_loc}/Makefile.am.diff
    git diff mainRepo/${branch_name}:configure.ac configure.ac > \
        ${output_loc}/configure.ac.diff
    cat configure.ac | grep AC_INIT | cut -d ' ' -f 2 | \
        cut -d '[' -f 2 | cut -d ']' -f 1 > ${output_loc}/release_num.txt
    git remote remove mainRepo
  """
}

def slurm_build(modes, partition, location, hw=null, additional_args=null) {
  def cmd = "pwd; "
  def prefix = "python${PYTHON_VERSION} ${RUN_LOCATION}/build.py"
  def libfabric = "--build_item=libfabric --build_loc=${CUSTOM_WORKSPACE}/${location}/libfabric"
  def fabtests = "--build_item=fabtests --build_loc=${CUSTOM_WORKSPACE}/${location}/libfabric/fabtests"
  if (RELEASE) {
    prefix = "${prefix} --release"
  }

  if (hw) {
    prefix = "${prefix} --build_hw=${hw}"
  }

  if (additional_args) {
    prefix = "${prefix} ${additional_args} "
  }

  for (mode in modes) {
    cmd = "${cmd} ${prefix} ${libfabric} --ofi_build_mode=${mode};"
    cmd = "${cmd} ${prefix} ${fabtests} --ofi_build_mode=${mode};"
  }

  slurm_batch(partition, "1", "${env.LOG_DIR}/libfabric_${partition}", cmd)
}

def build(item, mode=null, hw=null, additional_args=null) {
  def cmd = "${RUN_LOCATION}/build.py --build_item=${item}"

  if (item == "fabtests") {
    cmd ="${cmd} --build_loc=${CUSTOM_WORKSPACE}/source/libfabric/fabtests"
  } else {
    cmd ="${cmd} --build_loc=${CUSTOM_WORKSPACE}/source/libfabric"
  }

  if (mode) {
    cmd = "${cmd} --ofi_build_mode=${mode} "
  }

  if (hw) {
    cmd = "${cmd} --build_hw=${hw} "
  }

  if (RELEASE) {
    cmd = "${cmd} --release "
  }

  if (additional_args) {
    cmd = "${cmd} ${additional_args} "
  }

  run_python(PYTHON_VERSION, cmd)
}

def check_target() {
  echo "CHANGE_TARGET = ${env.CHANGE_TARGET}"
  if (changeRequest()) {
    TARGET = env.CHANGE_TARGET
  }

  if (TARGET) {
    return TARGET
  }

  return "main"
}

def release() {
  def file = "${env.WORKSPACE}/commit_id"
  if (!fileExists(file)) {
    echo "CI Run has not rebased with ofiwg/libfabric. Please Rebase."
    return 1
  }

  def changes = readFile file
  def changeStrings = new ArrayList<String>()

  for (line in changes.readLines()) {
    changeStrings.add(line)
  }

  if ((changeStrings.toArray().any { it =~ /(Makefile\.am)\b/ }) ||
      (changeStrings.toArray().any { it =~ /(configure\.ac)\b/ })) {
        echo "This is probably a release"
        return true
  }

  return false
}

def skip() {
  def file = "${env.WORKSPACE}/source/libfabric/commit_id"
  if (!fileExists(file)) {
    echo "CI Run has not rebased with ofiwg/libfabric. Please Rebase."
    return 1
  }

  def changes = readFile file
  def changeStrings = new ArrayList<String>()

  for (line in changes.readLines()) {
    changeStrings.add(line)
  }

  echo "Changeset is: ${changeStrings.toArray()}"
  if (changeStrings.toArray().every { it =~ /(?:fabtests\/pytests|man|prov\/efa|prov\/opx).*$/ }) {
    echo "DONT RUN!"
    return true
  }

  if (changeStrings.isEmpty()) {
    echo "DONT RUN!"
    return true
  }

  return false
}

pipeline {
  agent {
    node {
      label 'main'
      customWorkspace "workspace/${JOB_NAME}/${env.BUILD_NUMBER}"
    }
  }
  options {
      timestamps()
      timeout(activity: true, time: 6, unit: 'HOURS')
      skipDefaultCheckout()
  }
  environment {
      JOB_CADENCE = 'PR'
      WITH_ENV="'PATH+EXTRA=/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/bin:$PYTHONPATH'"
      RUN_LOCATION="${env.WORKSPACE}/${SCRIPT_LOCATION}/"
      CUSTOM_WORKSPACE="${CB_HOME}/workspace/${JOB_NAME}/${env.BUILD_NUMBER}"
      LOG_DIR = "${env.CUSTOM_WORKSPACE}/log_dir"
  }
  stages {
    stage ('checkout') {
      steps {
        script {
          dir ("${CUSTOM_WORKSPACE}/source/libfabric") {
            checkout scm
          }
          dir ("${CUSTOM_WORKSPACE}/grass/libfabric") {
            checkout scm
          }
          dir ("${CUSTOM_WORKSPACE}/water/libfabric") {
            checkout scm
          }
          dir ("${CUSTOM_WORKSPACE}/electric/libfabric") {
            checkout scm
          }
          dir ("${CUSTOM_WORKSPACE}/ucx/libfabric") {
            checkout scm
          }
          dir (CUSTOM_WORKSPACE) {
            checkout_external_resources()
          }
        }
      }
    }
    stage ('opt-out') {
      steps {
        script {
          TARGET=check_target()
          dir ("${CUSTOM_WORKSPACE}/source/libfabric") {
            generate_diff("${TARGET}", "${env.WORKSPACE}/source/libfabric")
            generate_release_num("${TARGET}", "${env.WORKSPACE}/source/libfabric")
          }

          if (env.WEEKLY == null) {
            weekly = false
          } else {
            weekly = env.WEEKLY.toBoolean()
          }
          if (weekly) {
            TIMEOUT="21600" 
          } 
          skip = skip()
          RELEASE = release()
          if (skip && !weekly) {
            DO_RUN=false
          }
        }
      }
    }
    stage ('prepare build') {
      when { equals expected: true, actual: DO_RUN }
      steps {
        script {  
          echo "Copying build dirs."
          build("builddir")
          echo "Copying log dirs."
          build("logdir")
        }
      }
    }
    stage ('parallel-builds') {
      when { equals expected: true, actual: DO_RUN }
      parallel {
        stage ('build-water') {
          steps {
            script {
              slurm_build(BUILD_MODES, "water", "water", "water")
              slurm_batch("squirtle,totodile", "1",
                            "${env.LOG_DIR}/build_mpich_water_log",
                            """python$PYTHON_VERSION ${RUN_LOCATION}/build.py \
                              --build_item=mpich --build_hw=water"""
                          )
            }
          }
        }
        stage ('build-grass') {
          steps {
            script {
              slurm_build(BUILD_MODES, "grass", "grass", "grass")
              slurm_batch("grass", "1",
                            "${env.LOG_DIR}/build_mpich_grass_log",
                            """python$PYTHON_VERSION ${RUN_LOCATION}/build.py \
                              --build_item=mpich --build_hw=grass"""
                          )
            }
          }
        }
        stage ('build-electric') {
          steps {
            script {
              slurm_build(BUILD_MODES, "electric", "electric", "electric")
            }
          }
        }
        stage ('build-ucx') {
          steps {
            script {
              slurm_build(BUILD_MODES, "totodile", "ucx", "ucx")
            }
          }
        }
        stage ('build-daos') {
          agent {
            node {
              label 'daos_head'
              customWorkspace CUSTOM_WORKSPACE
            }
          }
          options { skipDefaultCheckout() }
          steps {
            script {
              dir ("${CUSTOM_WORKSPACE}/source/libfabric") { checkout scm }
              checkout_external_resources()
              dir (CUSTOM_WORKSPACE) {
                build("logdir")
                build("libfabric", "reg", "daos")
                build("fabtests", "reg", "daos")
              }
            }
          }
        }
        stage ('build-gpu') {
          agent {
            node {
              label 'ze'
              customWorkspace CUSTOM_WORKSPACE
            }
          }
          options { skipDefaultCheckout() }
          steps {
            script {
              dir ("${CUSTOM_WORKSPACE}/source/libfabric") { checkout scm }
              checkout_external_resources()
              dir (CUSTOM_WORKSPACE) {
                build("logdir")
                build("builddir")
                build("libfabric", "reg", "gpu", "--gpu")
                build("fabtests", "reg", "gpu")
              }
            }
          }
        }
      }
    }
    stage('parallel-tests') {
      when { equals expected: true, actual: DO_RUN }
      parallel {
        stage('MPI_verbs-rxm_IMB') {
          steps {
            script {
              dir (RUN_LOCATION) {
                def providers = [["verbs", "rxm"]]
                for (def mpi in ["impi"]) {
                  for (imb_grp = 1; imb_grp < 4; imb_grp++) {
                    run_middleware(providers, "MPI", "IMB", "water",
                                   "squirtle,totodile", "2", "${mpi}",
                                   "${imb_grp}")
                  }
                }
              }
            }
          }
        }
        stage('MPI_verbs-rxm_OSU') {
          steps {
            script {
              dir (RUN_LOCATION) {
                def providers = [["verbs", "rxm"]]
                for (def mpi in ["impi", "mpich"]) {
                  run_middleware(providers, "MPI", "osu", "water",
                                 "squirtle,totodile", "2", "${mpi}")
                }
              }
            }
          }
        }
        stage('MPI_tcp') {
          steps {
            script {
              dir (RUN_LOCATION) {
                def providers = [["tcp", null]]
                for (imb_grp = 1; imb_grp < 4; imb_grp++) {
                  run_middleware(providers, "MPI", "IMB", "grass",
                                 "bulbasaur", "2", "impi", "${imb_grp}")
                }
                for (def mpi in ["impi", "mpich"]) {
                  run_middleware(providers, "MPI", "osu", "grass", "bulbasaur",
                                 "2", "${mpi}")
                }
              }
            }
          }
        }
        stage('tcp') {
           steps {
            script {
              dir (RUN_LOCATION) {
                run_fabtests("tcp", "grass", "bulbasaur", "2", "tcp")
              }
            }
          }
        }
        stage('verbs-rxm') {
          steps {
            script {
              dir (RUN_LOCATION) {
                run_fabtests("verbs-rxm", "water", "squirtle,totodile", "2",
                             "verbs", "rxm")
                run_fabtests("verbs-rxm", "water", "squirtle,totodile", "2",
                             "verbs", "rxm", "FI_MR_CACHE_MAX_COUNT=0")
                run_fabtests("verbs-rxm", "water", "squirtle,totodile", "2",
                             "verbs", "rxm", "FI_MR_CACHE_MONITOR=userfaultfd")
              }
            }
          }
        }
        stage('verbs-rxd') {
          steps {
            script {
              dir (RUN_LOCATION) {
                run_fabtests("verbs-rxd", "water", "squirtle", "2", "verbs",
                             "rxd")
                run_fabtests("verbs-rxd", "water", "squirtle", "2", "verbs",
                             "rxd", "FI_MR_CACHE_MAX_COUNT=0")
                run_fabtests("verbs-rxd", "water", "squirtle", "2", "verbs",
                             "rxd", "FI_MR_CACHE_MONITOR=userfaultfd")
              }
            }
          }
        }
        stage('udp') {
          steps {
            script {
              dir (RUN_LOCATION) {
                run_fabtests("udp", "grass", "bulbasaur", "2", "udp")
              }
            }
          }
        }
        stage('shm') {
          steps {
            script {
              dir (RUN_LOCATION) {
                run_fabtests("shm", "grass", "bulbasaur", "1", "shm")
                run_fabtests("shm", "grass", "bulbasaur", "1", "shm", null,
                            "FI_SHM_DISABLE_CMA=1")
              }
            }
          }
        }
        stage('sockets') {
          steps {
            script {
              dir (RUN_LOCATION) {
                run_fabtests("sockets", "grass", "bulbasaur", "2", "sockets")
              }
            }
          }
        }
        stage('ucx') {
          steps {
            script {
              dir (RUN_LOCATION) {
                run_fabtests("ucx", "ucx", "totodile", "2", "ucx")
              }
            }
          }
        }
        stage('psm3') {
          steps {
            script {
              dir (RUN_LOCATION) {
                run_fabtests("psm3", "water", "squirtle", "2", "psm3", null,
                             "PSM3_IDENTIFY=1")
              }
            }
          }
        }
        stage('mpichtestsuite') {
          steps {
            script {
              dir (RUN_LOCATION) {
                def providers = [['tcp', null], ["verbs","rxm"]]
                def MPIS = ["mpich"]
                if (env.WEEKLY.toBoolean()) {
                  MPIS = ["impi", "mpich"]
                }
                for (def mpi in MPIS) {
                  run_middleware(providers, "mpichtestsuite", "mpichtestsuite",
                                 "water", "squirtle,totodile", "2", "${mpi}")
                }
              }
            }
          }
        }
        stage('SHMEM') {
          steps {
            script {
              dir (RUN_LOCATION) {
                run_middleware([["verbs", null], ["tcp", null],
                                ["sockets", null]], "SHMEM", "shmem",
                                "water", "squirtle,totodile", "2")
              }
            }
          }
        }
        stage ('multinode_performance') {
          steps {
            script {
              dir (RUN_LOCATION) {
                run_middleware([["tcp", null]], "multinode_performance",
                               "multinode", "grass", "bulbasaur", "2")
              }
            }
          }
        }
        stage ('oneCCL') {
          steps {
            script {
              dir (RUN_LOCATION) {
                run_middleware([["tcp", "rxm"]/*, ["psm3", null]*/], "oneCCL",
                               "oneccl", "grass", "bulbasaur", "2")
              }
            }
          }
        }
        stage ('oneCCL-GPU-v3') {
          agent { node { label 'ze' } }
          options { skipDefaultCheckout() }
          steps {
            script {
              dir (RUN_LOCATION) {
                run_middleware([["verbs", "rxm"]], "oneCCL-GPU-v3", "onecclgpu",
                               "gpu", "fabrics-ci", "2")
              } 
            }
          }
        }
        stage('daos_tcp') {
          agent { node { label 'daos_tcp' } }
          options { skipDefaultCheckout() }
          steps {
            script {
              dir (RUN_LOCATION) {
                run_python(PYTHON_VERSION,
                           """runtests.py --prov='tcp' --util='rxm' \
                           --test=daos --build_hw=daos \
                           --log_file=${env.LOG_DIR}/daos_tcp-rxm_reg""")
              }
            }
          }
        }
         stage('daos_verbs') {
          agent { node { label 'daos_verbs' } }
          options { skipDefaultCheckout() }
          steps {
            script {
              dir (RUN_LOCATION) {
                run_python(PYTHON_VERSION,
                           """runtests.py --prov='verbs' --util='rxm' \
                           --test=daos --build_hw=daos \
                           --log_file=${env.LOG_DIR}/daos_verbs-rxm_reg""")
              }
            }
          }
        }
        stage ('DMABUF-Tests') {
          agent { node { label 'ze' } }
          options { skipDefaultCheckout() }
          steps {
            script {
              dir ("${env.WORKSPACE}/${SCRIPT_LOCATION}/") {
                dmabuf_output = "${LOG_DIR}/DMABUF-Tests_verbs-rxm_dmabuf"
                cmd = """ python3.9 runtests.py --test=dmabuf \
                           --prov=verbs --util=rxm --build_hw=gpu"""
                slurm_batch("fabrics-ci", "1", "${dmabuf_output}_1_reg",
                            "${cmd}")
                slurm_batch("fabrics-ci", "2", "${dmabuf_output}_2_reg",
                            "${cmd}")
              }
            }
          }
        }
        stage ('ze-shm-v3') {
          agent { node { label 'ze' } }
          options { skipDefaultCheckout() }
          steps {
            script {
              dir (RUN_LOCATION) {
                run_fabtests("ze_v3_shm", "gpu", "fabrics-ci", "1", "shm", null,
                             null, "h2d")
                run_fabtests("ze_v3_shm", "gpu", "fabrics-ci", "1", "shm", null,
                             null, "d2d")
                run_fabtests("ze_v3_shm", "gpu", "fabrics-ci", "1", "shm", null,
                             null, "xd2d")
              }
            }
          }
        }
        stage('dsa') {
          when { equals expected: true, actual: DO_RUN }
          steps {
            script {
              dir (RUN_LOCATION) {
                run_fabtests("shm_dsa", "electric", "pikachu", "1", "shm", null,
                             """FI_SHM_DISABLE_CMA=1 FI_SHM_USE_DSA_SAR=1 \
                                FI_LOG_LEVEL=warn""")
              }
            }
          }
        }
      }
    }
    stage ('Summary') {
      when { equals expected: true, actual: DO_RUN }
      steps {
        script {
          gather_logs("${env.DAOS_ADDR}", "${env.DAOS_KEY}", "${env.LOG_DIR}",
                      "${env.LOG_DIR}")
          gather_logs("${env.ZE_ADDR}", "${env.ZE_KEY}", "${env.LOG_DIR}",
                      "${env.LOG_DIR}")

          summarize("all", verbose=false, release=RELEASE,
                    send_mail=env.WEEKLY.toBoolean())
          if (RELEASE) {
            save_summary()
          }
        }
      }
    }
  }

  post {
    always {
      script {
        summarize("all")
      }
    }
    success {
      script {
        summarize("all", verbose=true, release=false,
        send_mail=env.WEEKLY.toBoolean())
      }
    }
    aborted {
      node ('daos_head') {
        dir ("${DELETE_LOCATION}/middlewares") { deleteDir() }
      }
      node ('ze') {
        dir ("${DELETE_LOCATION}/middlewares") { deleteDir() }
      }
      dir ("${DELETE_LOCATION}/middlewares") { deleteDir() }
    }
    cleanup {
      node ('daos_head') {
        dir("${env.WORKSPACE}") { deleteDir() }
        dir("${env.WORKSPACE}@tmp") { deleteDir() }
      }
      node ('ze') {
        dir("${env.WORKSPACE}") { deleteDir() }
        dir("${env.WORKSPACE}@tmp") { deleteDir() }
      }
      dir("${env.WORKSPACE}") { deleteDir() }
      dir("${env.WORKSPACE}@tmp") { deleteDir() }
    }
  }
}