#
# Copyright (C) 2019 Intel Corporation
#
# This software and the related documents are Intel copyrighted materials, and your use of them
# is governed by the express license under which they were provided to you ("License"). Unless
# the License provides otherwise, you may not use, modify, copy, publish, distribute, disclose
# or transmit this software or the related documents without Intel's prior written permission.
#
# This software and the related documents are provided as is, with no express or implied
# warranties, other than those that are expressly stated in the License.
#


# ------------------------------------------------------------------------------
# Floating Point Reproducibility Advising Tool
# ------------------------------------------------------------------------------

import argparse
import re
import sys
import textwrap

from collections import namedtuple

try:

    import advisor

except ImportError:

    print(
        """Import error: Python could not resolve path to Advisor's pythonapi directory.
        To fix, either manually add path to the pythonapi directory into PYTHONPATH environment
        variable, or use advixe-vars.* scripts to set up product environment variables automatically."""
    )
    sys.exit(1)


def filtered_walk(survey, loops=None):
    for row in survey.topdown:
        stack = [(row, 0, True if not loops else False)]
        while stack:
            v, level, include = stack.pop()
            for c in v.get_children():
                new_include = include or re.search(loops, c["function_call_sites_and_loops"])
                stack.append((c, level + 1, new_include))
                if new_include:
                    yield c


def make_matcher(searchSpecs):
    """ Returns a regex matcher function for a given field and string
    contained in searchSpecs """

    def func_matcher(x):
        """ True if x matches the specs, False if not """
        return bool(re.findall(searchSpecs.string, x[searchSpecs.field]))

    return func_matcher


def _match_filter(searchSpecs):
    """ returns a function that takes a survey and returns any items
    that don't match the searchSpecs """

    def func_filt(survey, loops=None):
        """ filter from survey only searchSpec matches on functions  """
        return filter(make_matcher(searchSpecs), filtered_walk(survey, loops))

    return func_filt


issue = namedtuple(
    "issue", ["title", "description", "recommendation", "verbose", "lines", "functions", "note", "blocks",],
)


def issue_type_maker(base_issue):
    """ Curry the issue creating function with base issue params """

    def issue_type(blocks=None, verbose=False, lines=None, functions=None, note=None):
        return issue(
            base_issue.title,
            base_issue.description,
            base_issue.recommendation,
            verbose,
            lines,
            functions,
            note,
            blocks=blocks,
        )

    return issue_type


def create_advisor(spec):
    """ returns an advisor function with parameters defined by
    spec. The function takes a survey as input and returns all
    blocks that match based on the spec """

    def new_advisor(survey, loops=None):
        return issue_type_maker(spec[1])(set(_match_filter(spec[0])(survey, loops)))

    return new_advisor


SpecType = namedtuple("SpecType", ["string", "field"])

# Search for 'string' in 'field'
fma_sig = SpecType(r"FMA", "traits")
omp_sig = SpecType(r"kmpc_reduce", "function_call_sites_and_loops")
tbb_sig = SpecType(r"start_reduce", "mangled_name")
peel_sig = SpecType(r"Peel", "type")

# math library calls
libm_sig = SpecType(r"__libm_", "function_call_sites_and_loops")
svml_sig = SpecType(r"__svml_", "function_call_sites_and_loops")
mkl_sig = SpecType(r"MKL", "function_call_sites_and_loops")


############## Issue/Advising Text ###########
BaseIssue = namedtuple("BaseIssue", "title description recommendation")
FMA_base = BaseIssue(
    "Fused multiply-add (FMA) instruction(s) present",  # title
    "The use of a fused multiply-add (FMA) instruction over the "  # description
    "analogous series of multiply and add instructions produces "
    "a more accurate and therefore numerically different result.",  # /description
    "For repeatability:\n"  # recommendation
    "FMA instruction does not introduce non-repeatability "
    "in a single binary, but recompiling at a different optimization "
    "level or with different compiler flags is not numerically "
    "reproducible.\n"
    "For reproducibility:\nFMA was introduced in AVX2 instruction set "
    "architecture. Numerical reproducibility is possible only "
    "between pre-FMA microarchitectures and FMA microarchitectures, by "
    "limiting the binary to non-FMA with the -no-fma compiler flag.\n"
    "Note: The -no-fma flag does not apply to library calls.\n"
    "Additionally, different operating systems, complier versions, "
    "and hardware configurations can all result in differences in FMA "
    "use in the compiled binary. Therefore, reproducibility is only "
    "possible by:\n"
    "\t Using a single binary on machines with FMA microarchitecture\n"
    "\t Disabling FMA generation for all hardware configurations. "
    "(This will result in decreased performance on systems with FMA.)",  # /recommendation
)

omp_base = BaseIssue(
    "OpenMP* reduction(s) present",  # title
    "OpenMP reductions can cause issues with "  # description
    "numerical repeatability, all stemming from differences "
    "in the distribution of sub-reductions and subsequent "
    "order of operations for the final reduction.",  # /description
    "For repeatability and reproducibility:\nThe following is "  # recommendation
    "required for reproducibility or repeatability with OpenMP "
    "reductions:\n"
    "\t Static load distribution (schedule(STATIC)) \n"
    "\t A fixed number of threads\n"
    "\t The environmental variable KMP_DETERMINISTIC_REDUCTION "
    "is set to true. This produces a deterministic tree reduction "
    "algorithm. ",  # /recommendation
)

tbb_base = BaseIssue(
    "Intel(R) Threading Building Blocks (Intel(R) TBB) reduction " "operation(s) present",  # title  # /title
    "TBB reduction can produce numerical non-repeatability "  # description
    "because of differences in work sharing and the order "
    "of sub-reductions in a final reduction.",  # /description
    "For repeatability and reproducibility:\n"  # recommendation
    "Intel TBB has an omp_deterministic_reduce feature, "
    "which allows deterministic reduction for a different number "
    "of threads. Replace the nondeterministic reduction operation with a "
    "deterministic reduction operation. ",  # /recommendation
)

libm_base = BaseIssue(
    "libm call(s) present",  # title
    "libm calls are self-consistent, but limit performance "  # description
    "as they only allow scalar transcendental math. In addition, "
    "changes in vectorization can cause issues, as "
    "different vectorization can change the balance of libm and "
    "SVML (vector transcendental math) calls. ",  # /description
    "For repeatability and reproducibility:\n"  # recommendation
    "Consistent use of either libm or SVML is suggested.\n "
    "\t -fimf-arch-consistency=true to force libm (no SIMD math, heavy "
    "performance penalty)\n"
    "\t -fimf-use-svml\t to force SVML (for both SIMD and scalar "
    "operations. Recommended for maximum performance and "
    "reproducibility."
    "\nNote: Both SVML and libm algorithmic changes between "
    "library (or compiler) versions are not ensured to be bitwise-"
    "reproducible. For maximum reproducibility, eliminate"
    "calls to external libraries. ",  # /recommendation
)

svml_base = BaseIssue(
    "SVML call(s) present",  # title
    "SVML calls are self-consistent, high-performance "  # description
    "SIMD vector versions of transcendental math. However, "
    "results are not reproducible with respect to libm library calls. "
    "This can manifest as nonreproducibility with legacy binaries.",  # /description
    "For repeatability and reproducibility:\n"  # recommendation
    "Consistent use of either libm or SVML is suggested.\n "
    "\t -fimf-arch-consistency=true to force libm (no SIMD math, heavy "
    "performance penalty)\n"
    "\t -fimf-use-svml to force SVML (for both SIMD and scalar "
    "operations. Recommended for maximum performance and "
    "reproducibility."
    "\nNote: Both SVML and libm algorithmic changes between "
    "library (or compiler) versions are not ensured to be bitwise-"
    "reproducible. For maximum reproducibility, eliminate"
    "calls to external libraries. ",  # /recommendation
)

math_base = BaseIssue(
    "SVML and libm calls present",  # title
    "SVML SIMD-vector transcendental math functions differ "  # description
    "from the analogous libm (scalar) versions. Code including both "
    "often produces non-repeatable and non-reproducible results "
    "because of different runtime code-paths implemented using libm "
    "and SVML calls. ",  # /description
    "For repeatability and reproducibility:\n"  # recommendation
    "Consistent use of either libm or SVML is suggested.\n "
    "\t -fimf-arch-consistency=true to force libm (no SIMD math, heavy "
    "performance penalty)\n"
    "\t -fimf-use-svml\t to force SVML (for both SIMD and scalar "
    "operations). Recommended for maximum performance and "
    "reproducibility."
    "\nNote: Both SVML and libm algorithmic changes between "
    "library (or compiler) versions are not ensured to be bitwise-"
    "reproducible. For maximum reproducibility, eliminate"
    "calls to external libraries. ",  # /recommendation
)

mkl_base = BaseIssue(
    "Intel(R) Math Kernel Library (Intel(R) MKL) call(s) present",  # title
    "MKL is a fast, optimized set of math routines. However "  # description
    "it is not explicitly reproducible.",  # /description
    "For reproducibility, ensure that:\n"  # recommendation
    "\t A fixed number of threads are used.\n"
    "\t Arrays are aligned in memory.\n"
    "\t The environmental variable MKL_CBWR is set to one of "
    "{COMPATIBLE, SSE2, SSE4_1, AVX}, representing the minimum machine "
    "specifications for which reproducibility is ensured by Intel MKL. "
    "Naturally, AVX will achieve faster performance than COMPATIBLE, "
    "but limits CNR to systems with an AVX instruction set.\n"
    "There are also additional options available for CNR in Intel MKL. "
    "You can refer to the following Intel publications for details:\n"
    '\t "Using the Intel MKL and Intel Compilers to Obtain Run-'
    'to-run Numerical Reproducible Results" by Rosenquist and Story [1] \n'
    '\t "Introduction to Conditional Numerical Reproducibility" '
    "by Rosenquist [2] \n"  # /recommendation
    "\n [1] https://software.intel.com/sites/products/parallelmag/singlearticles/issue11/7080_2_IN_ParallelMag_Issue11_CBWR.PDF"
    "\n [2] https://www.intel.com/content/www/us/en/developer/articles/technical/introduction-to-the-conditional-numerical-reproducibility-cnr.html",
)

peel_base = BaseIssue(
    "Peeled loop(s) present",  # title
    "The compiler automatically peels iterations from the vector "  # description
    "loop into a scalar loop to align the vector loop with a particular"
    "memory reference. Having a peeled loop can lead "
    "to differences in the results when alignment changes.",  # /description
    "For reproducibility:\n"  # recommendation
    "\t Align the data and tell the compiler the data is aligned. "
    "Use _mm_malloc and __assume_aligned for dynamic data and "
    "__declspec(align(64)) for static data.\n"
    "\t You can also use -qno-opt-dynamic-align to remove"
    " optimizations based on alignment.",  # /recommendation
)
# end


fma_advisor = create_advisor((fma_sig, FMA_base))
tbb_advisor = create_advisor((tbb_sig, tbb_base))
omp_advisor = create_advisor((omp_sig, omp_base))
libm_advisor = create_advisor((libm_sig, libm_base))
svml_advisor = create_advisor((svml_sig, svml_base))
mkl_advisor = create_advisor((mkl_sig, mkl_base))
peel_advisor = create_advisor((peel_sig, peel_base))


def math_advisor(survey, loops=None):
    libm = libm_advisor(survey, loops)
    svml = svml_advisor(survey, loops)
    if libm.blocks and svml.blocks:
        return issue_type_maker(SpecType("", ""))(libm.blocks + svml.blocks)


def format_block_issue(issue, verbose=False):
    """ verbose flag is intended for file printing.
        verbose to false is for terminal printing and
        is suitable for 80 char width               """
    output = ""
    line = lambda x: x + "\n"
    format_spec = "{}\t{}" if verbose else "{:60}{:20}"

    def trim_line(n):
        if verbose:
            return n["function_call_sites_and_loops"]
        else:
            title = textwrap.wrap(n["function_call_sites_and_loops"], 56)
            printer = title[0]
            if len(title) > 1:
                printer += "..."
            return printer

    def tail_line(n):
        if verbose:
            return n
        else:
            title = n
            printer = ""
            if len(title) > 20:
                printer = "..."
            printer += title[-18:]
            return printer

    if issue:
        if issue.blocks:
            output += line("---" + issue.title)
            output += line(format_spec.format("Name", "Location Stack"))
            for n in issue.blocks:
                loc_field = "location" if "location" in n else "source_location"  # compatibility
                locations = n[loc_field] if loc_field in n else "Unavailable"
                if locations != "Unavailable":
                    level = n
                    while level.parent[loc_field]:
                        level = level.parent
                        locations = level[loc_field] if not verbose else locations + "," + level[loc_field]

                output += line(format_spec.format(trim_line(n), tail_line(locations)))
            if verbose:
                output += line(issue.description)
                output += line(issue.recommendation)
            else:
                for row in str.splitlines(issue.description):
                    output += line(textwrap.fill(row, 80, replace_whitespace=False))
                for row in str.splitlines(issue.recommendation):
                    output += line(textwrap.fill(row, 80, replace_whitespace=False))
    return output


if __name__ == "__main__":

    # Create a parser for collecting command-line arguments.
    parser = argparse.ArgumentParser(
        description="""
        This script helps to understand what parts of application
        are causing non-reproducibility in floating point computations,
        and how to address them."""
    )
    parser.add_argument(
        "project_dir", metavar="project_dir", type=str, default=None, help="path to Advisor project",
    )
    parser.add_argument("--fma", action="store_true", help="advising for FMA use")
    parser.add_argument("--omp", action="store_true", help="advising on OMP reductions")
    parser.add_argument("--tbb", action="store_true", help="advising on TBB reductions")
    parser.add_argument("--libm", action="store_true", help="alert on libm instructions")
    parser.add_argument("--mkl", action="store_true", help="alert on mkl instructions")
    parser.add_argument("--svml", action="store_true", help="alert on SVML instructions")
    parser.add_argument("--math", action="store_true", help="alert if a mix of libm and SVML is found")
    parser.add_argument("--peel", action="store_true", help="alert if a mix of libm and SVML is found")
    parser.add_argument(
        "-o",
        "--output",
        nargs="?",
        type=argparse.FileType("w"),
        default=sys.stdout,
        const="fprepro_report.txt",
        help="target output file",
    )
    parser.add_argument(
        "-v", "--verbose", action="store_true", help="write without wrapping text and include uncropped location trace",
    )
    parser.add_argument("-a", "--all", action="store_true", help="enable all recommendations")
    parser.add_argument(
        "-s",
        "--select",
        nargs="?",
        default=None,
        const=None,
        help="will only return issues from sites whose name contains the input string",
    )
    args = parser.parse_args()

    project = advisor.open_project(args.project_dir)

    try:
        survey = project.load(advisor.SURVEY)
    except:
        print("Error loading project survey...does the advisor project" "contain a survey result?")

    if args.all:
        args.fma = True
        args.omp = True
        args.tbb = True
        args.libm = True
        args.mkl = True
        args.svml = True
        args.math = True
        args.peel = True

    # analysis function calls
    def format_print(contents):
        args.output.write(format_block_issue(contents, args.verbose))

    if args.fma:
        format_print(fma_advisor(survey, args.select))
    if args.omp:
        format_print(omp_advisor(survey, args.select))
    if args.tbb:
        format_print(tbb_advisor(survey, args.select))
    if args.libm:
        format_print(libm_advisor(survey, args.select))
    if args.svml:
        format_print(svml_advisor(survey, args.select))
    if args.math:
        format_print(math_advisor(survey, args.select))
    if args.mkl:
        format_print(mkl_advisor(survey, args.select))
    if args.peel:
        format_print(peel_advisor(survey, args.select))
