# 786
# Aldy source: test_minor_real.py
#   This file is subject to the terms and conditions defined in
#   file 'LICENSE', which is part of this source code package.


import pytest  # noqa
import collections

from .test_minor_synthetic import assert_minor


def test_normal(real_gene, solver):  # NA07439/v1 : currently test only one solution
    assert_minor(
        real_gene,
        solver,
        {
            "cn": {"1": 3},
            "major": {"4C": 2, "41": 1},
            "data": {
                (42522391, "G>A"): 353,
                (42522391, "_"): 177,
                (42522612, "C>G"): 689,
                (42522612, "_"): 0,
                (42523210, "T>C"): 347,
                (42523210, "_"): 198,
                (42523408, "T>G"): 811,
                (42523408, "_"): 0,
                (42523804, "C>T"): 160,
                (42523804, "_"): 392,
                (42523942, "G>A"): 208,
                (42523942, "_"): 394,
                (42524695, "T>C"): 489,
                (42524695, "_"): 248,
                (42524946, "C>T"): 483,
                (42524946, "_"): 303,
                (42525131, "C>G"): 640,
                (42525131, "_"): 0,
                (42525755, "G>A"): 340,
                (42525755, "_"): 174,
                (42525797, "G>C"): 0,
                (42525797, "_"): 197,
                (42525951, "A>C"): 640,
                (42525951, "_"): 0,
                (42526048, "G>C"): 431,
                (42526048, "_"): 287,
                (42526483, "C>A"): 643,
                (42526483, "_"): 0,
                (42526548, "T>C"): 284,
                (42526548, "_"): 343,
                (42526560, "T>G"): 283,
                (42526560, "_"): 303,
                (42526561, "C>G"): 288,
                (42526561, "_"): 301,
                (42526566, "A>G"): 298,
                (42526566, "_"): 322,
                (42526570, "G>C"): 302,
                (42526570, "_"): 313,
                (42526572, "G>T"): 297,
                (42526572, "_"): 324,
                (42526693, "G>A"): 665,
                (42526693, "_"): 330,
                (42527470, "C>T"): 377,
                (42527470, "_"): 351,
                (42527532, "G>A"): 259,
                (42527532, "_"): 351,
                (42527792, "C>T"): 333,
                (42527792, "_"): 200,
                (42528027, "T>C"): 46,
                (42528027, "_"): 0,
            },
            "sol": [
                (
                    "4.005",
                    [],
                    [
                        (42523408, "T>G"),
                        (42525951, "A>C"),
                        (42526048, "G>C"),
                        (42526483, "C>A"),
                        (42528027, "T>C"),
                    ],
                ),
                (
                    "4.005",
                    [],
                    [
                        (42523408, "T>G"),
                        (42525951, "A>C"),
                        (42526483, "C>A"),
                        (42528027, "T>C"),
                    ],
                ),
                (
                    "41.001",
                    [
                        (42522311, "C>T"),
                        (42523002, "G>A"),
                        (42523208, "C>T"),
                        (42526579, "C>G"),
                    ],
                    [],
                ),
            ],
        },
    )


def test_multiple(real_gene, solver):  # HG00436/v1
    assert_minor(
        real_gene,
        solver,
        {
            "cn": {"1": 3},
            "major": {"2": 2, "71": 1},
            "data": {
                (42522311, "C>T"): 643,
                (42522311, "_"): 143,
                (42522612, "C>G"): 634,
                (42522612, "_"): 357,
                (42522677, "G>A"): 0,
                (42522677, "_"): 1183,
                (42523002, "G>A"): 861,
                (42523002, "_"): 125,
                (42523208, "C>T"): 525,
                (42523208, "_"): 273,
                (42523408, "T>G"): 851,
                (42523408, "_"): 326,
                (42523942, "G>A"): 622,
                (42523942, "_"): 332,
                (42524217, "G>T"): 0,
                (42524217, "_"): 675,
                (42524312, "G>A"): 0,
                (42524312, "_"): 919,
                (42524322, "A>G"): 0,
                (42524322, "_"): 948,
                (42525035, "G>A"): 0,
                (42525035, "_"): 1106,
                (42525068, "G>A"): 0,
                (42525068, "_"): 1003,
                (42525131, "C>G"): 447,
                (42525131, "_"): 180,
                (42525279, "G>A"): 0,
                (42525279, "_"): 1267,
                (42525298, "A>G"): 0,
                (42525298, "_"): 1351,
                (42525755, "G>A"): 0,
                (42525755, "_"): 690,
                (42525797, "G>C"): 0,
                (42525797, "_"): 247,
                (42525951, "A>C"): 729,
                (42525951, "_"): 194,
                (42526048, "G>C"): 594,
                (42526048, "_"): 372,
                (42526483, "C>A"): 708,
                (42526483, "_"): 291,
                (42526548, "T>C"): 861,
                (42526548, "_"): 237,
                (42526560, "T>G"): 886,
                (42526560, "_"): 221,
                (42526561, "C>G"): 879,
                (42526561, "_"): 221,
                (42526566, "A>G"): 891,
                (42526566, "_"): 230,
                (42526570, "G>C"): 896,
                (42526570, "_"): 230,
                (42526572, "G>T"): 911,
                (42526572, "_"): 243,
                (42526668, "C>T"): 517,
                (42526668, "_"): 983,
                (42527470, "C>T"): 834,
                (42527470, "_"): 273,
                (42527532, "G>A"): 688,
                (42527532, "_"): 257,
                (42527541, "delTC"): 0,
                (42527541, "_"): 916,
                (42528027, "T>C"): 21,
                (42528027, "_"): 4,
                (42528095, "C>T"): 0,
                (42528095, "_"): 4,
                (42528381, "G>C"): 19,
                (42528381, "_"): 16,
            },
            "sol": [
                ("2.001", [(42526579, "C>G")], []),
                ("2.001", [(42526579, "C>G")], []),
                ("71.002", [], []),
            ],
        },
    )


def test_fusion(real_gene, solver):  # HG01190/v1
    assert_minor(
        real_gene,
        solver,
        {
            "cn": {"1": 1, "68": 1},
            "major": {"4": 1, "68": 1},
            "data": {
                (42522391, "G>A"): 227,
                (42522391, "_"): 9,
                (42522612, "C>G"): 308,
                (42522612, "_"): 0,
                (42523210, "T>C"): 453,
                (42523210, "_"): 0,
                (42523408, "T>G"): 882,
                (42523408, "_"): 0,
                (42524695, "T>C"): 276,
                (42524695, "_"): 136,
                (42524946, "C>T"): 319,
                (42524946, "_"): 151,
                (42525131, "C>G"): 377,
                (42525131, "_"): 0,
                (42525797, "G>C"): 84,
                (42525797, "_"): 5,
                (42525810, "T>C"): 48,
                (42525810, "_"): 3,
                (42525820, "G>T"): 30,
                (42525820, "_"): 3,
                (42525951, "A>C"): 494,
                (42525951, "_"): 160,
                (42526048, "G>C"): 521,
                (42526048, "_"): 177,
                (42526483, "C>A"): 507,
                (42526483, "_"): 0,
                (42526693, "G>A"): 749,
                (42526693, "_"): 0,
                (42527792, "C>T"): 433,
                (42527792, "_"): 0,
                (42528027, "T>C"): 31,
                (42528027, "_"): 0,
                (42528223, "G>A"): 0,
                (42528223, "_"): 0,
            },
            "sol": [
                ("4.001", [(42526483, "C>A")], []),
                ("68.001", [(42528223, "G>A")], []),
            ],
        },
        shallow=True,
    )


def test_deletion(real_gene, solver):  # HG00276/v1
    assert_minor(
        real_gene,
        solver,
        {
            "cn": {"1": 1, "5": 1},
            "major": {"4": 1, "5": 1},
            "data": {
                (42522391, "G>A"): 212,
                (42522391, "_"): 7,
                (42522612, "C>G"): 277,
                (42522612, "_"): 0,
                (42523210, "T>C"): 248,
                (42523210, "_"): 8,
                (42523408, "T>G"): 547,
                (42523408, "_"): 0,
                (42524695, "T>C"): 306,
                (42524695, "_"): 94,
                (42524946, "C>T"): 291,
                (42524946, "_"): 86,
                (42525131, "C>G"): 357,
                (42525131, "_"): 0,
                (42525797, "G>C"): 106,
                (42525797, "_"): 5,
                (42525810, "T>C"): 73,
                (42525810, "_"): 3,
                (42525820, "G>T"): 57,
                (42525820, "_"): 0,
                (42525951, "A>C"): 271,
                (42525951, "_"): 132,
                (42526048, "G>C"): 301,
                (42526048, "_"): 151,  # Note this-- probably a mapping error!
                (42526483, "C>A"): 232,
                (42526483, "_"): 0,
                (42526693, "G>A"): 311,
                (42526693, "_"): 19,
                (42527792, "C>T"): 186,
                (42527792, "_"): 5,
                (42528027, "T>C"): 7,
                (42528027, "_"): 0,
            },
            "sol": [("4.001", [(42528223, "G>A")], []), ("5.001", [], [])],
        },
    )


def test_comparison(real_gene, solver):  # NA10846/v1
    from aldy.solutions import CNSolution, SolvedAllele, MajorSolution
    from aldy.coverage import Coverage
    from aldy.minor import estimate_minor

    data = {
        (42522391, "G>A"): 286,
        (42522391, "_"): 319,
        (42522397, "delCT"): 0,
        (42522397, "_"): 631,
        (42522463, "G>A"): 0,
        (42522463, "_"): 620,
        (42522549, "G>A"): 0,
        (42522549, "_"): 677,
        (42522608, "T>C"): 0,
        (42522608, "_"): 910,
        (42522612, "C>G"): 453,
        (42522612, "_"): 450,
        (42522691, "G>C"): 0,
        (42522691, "_"): 1285,
        (42522915, "C>G"): 0,
        (42522915, "_"): 1491,
        (42522964, "C>T"): 0,
        (42522964, "_"): 1073,
        (42523144, "G>A"): 0,
        (42523144, "_"): 657,
        (42523210, "T>C"): 475,
        (42523210, "_"): 352,
        (42523240, "G>A"): 0,
        (42523240, "_"): 1009,
        (42523247, "insT"): 0,
        (42523247, "_"): 1019,
        (42523357, "G>T"): 0,
        (42523357, "_"): 1314,
        (42523408, "T>G"): 1054,
        (42523408, "_"): 384,
        (42523720, "G>A"): 0,
        (42523720, "_"): 870,
        (42523787, "C>A"): 0,
        (42523787, "_"): 680,
        (42523812, "G>A"): 0,
        (42523812, "_"): 621,
        (42524032, "A>T"): 0,
        (42524032, "_"): 905,
        (42524072, "C>G"): 0,
        (42524072, "_"): 842,
        (42524129, "C>T"): 0,
        (42524129, "_"): 695,
        (42524154, "insC"): 0,
        (42524154, "_"): 638,
        (42524216, "G>A"): 0,
        (42524216, "_"): 580,
        (42524217, "G>T"): 0,
        (42524217, "_"): 576,
        (42524489, "G>A"): 0,
        (42524489, "_"): 811,
        (42524695, "T>C"): 400,
        (42524695, "_"): 641,
        (42524737, "C>T"): 0,
        (42524737, "_"): 1102,
        (42524814, "G>A"): 0,
        (42524814, "_"): 1136,
        (42524923, "A>G"): 0,
        (42524923, "_"): 1234,
        (42524934, "G>A"): 0,
        (42524934, "_"): 1403,
        (42524946, "C>T"): 612,
        (42524946, "_"): 794,
        (42524969, "G>T"): 0,
        (42524969, "_"): 1287,
        (42525044, "G>A"): 0,
        (42525044, "_"): 1082,
        (42525131, "C>G"): 656,
        (42525131, "_"): 567,
        (42525153, "C>A"): 0,
        (42525153, "_"): 1220,
        (42525238, "A>C"): 0,
        (42525238, "_"): 1387,
        (42525389, "G>A"): 0,
        (42525389, "_"): 1266,
        (42525443, "C>T"): 0,
        (42525443, "_"): 1347,
        (42525451, "G>A"): 0,
        (42525451, "_"): 1365,
        (42525499, "C>T"): 0,
        (42525499, "_"): 1330,
        (42525541, "G>C"): 0,
        (42525541, "_"): 1175,
        (42525624, "C>T"): 0,
        (42525624, "_"): 1174,
        (42525727, "A>C"): 0,
        (42525727, "_"): 857,
        (42525732, "T>C"): 0,
        (42525732, "_"): 854,
        (42525755, "G>A"): 0,
        (42525755, "_"): 661,
        (42525797, "G>C"): 161,
        (42525797, "_"): 212,
        (42525810, "T>C"): 105,
        (42525810, "_"): 172,
        (42525820, "G>T"): 61,
        (42525820, "_"): 169,
        (42525951, "A>C"): 450,
        (42525951, "_"): 543,
        (42526048, "G>C"): 433,
        (42526048, "_"): 512,
        (42526369, "G>A"): 0,
        (42526369, "_"): 1003,
        (42526483, "C>A"): 358,
        (42526483, "_"): 366,
        (42526523, "G>A"): 0,
        (42526523, "_"): 614,
        (42526548, "T>C"): 0,
        (42526548, "_"): 561,
        (42526560, "T>G"): 0,
        (42526560, "_"): 491,
        (42526561, "C>G"): 0,
        (42526561, "_"): 486,
        (42526566, "A>G"): 0,
        (42526566, "_"): 523,
        (42526570, "G>C"): 0,
        (42526570, "_"): 529,
        (42526572, "G>T"): 0,
        (42526572, "_"): 534,
        (42526579, "C>G"): 0,
        (42526579, "_"): 600,
        (42526693, "G>A"): 549,
        (42526693, "_"): 649,
        (42526811, "T>G"): 0,
        (42526811, "_"): 984,
        (42526815, "delC"): 0,
        (42526815, "_"): 957,
        (42526840, "insC"): 0,
        (42526840, "_"): 865,
        (42526878, "A>G"): 0,
        (42526878, "_"): 806,
        (42526922, "A>G"): 0,
        (42526922, "_"): 766,
        (42527075, "G>A"): 0,
        (42527075, "_"): 666,
        (42527157, "C>T"): 0,
        (42527157, "_"): 938,
        (42527223, "G>A"): 0,
        (42527223, "_"): 1478,
        (42527263, "G>A"): 0,
        (42527263, "_"): 1446,
        (42527290, "G>T"): 0,
        (42527290, "_"): 1402,
        (42527380, "C>T"): 0,
        (42527380, "_"): 1161,
        (42527470, "C>T"): 206,
        (42527470, "_"): 596,
        (42527532, "G>A"): 0,
        (42527532, "_"): 542,
        (42527632, "A>T"): 0,
        (42527632, "_"): 682,
        (42527792, "C>T"): 312,
        (42527792, "_"): 330,
        (42527895, "insT"): 0,
        (42527895, "_"): 271,
        (42528027, "T>C"): 20,
        (42528027, "_"): 12,
        (42528223, "G>A"): 2,
        (42528223, "_"): 3,
        (42528381, "G>C"): 0,
        (42528381, "_"): 40,
    }

    cn_sol = CNSolution(real_gene, 0, list(collections.Counter({"1": 2}).elements()))
    cov = collections.defaultdict(dict)
    for (pos, op), c in data.items():
        cov[pos][op] = c

    majors = [
        MajorSolution(
            0,
            collections.defaultdict(
                int, {SolvedAllele(real_gene, m): c for m, c in major.items()}
            ),
            cn_sol,
            [],
        )
        for major in [{"1": 1, "4": 1}, {"10": 1, "4M": 1}, {"39": 1, "4J": 1}]
    ]
    sols = estimate_minor(real_gene, Coverage(cov, 0.5, {}), majors, solver)
    assert len(sols) == 3
    sols.sort(key=lambda x: x.score)

    def assert_sol(sol, expected):
        eall, emiss, enew = set(), set(), set()
        for i in expected:
            eall.add(i[0])
            emiss |= set(i[1])
            enew |= set(i[2])
        pall, pmiss, pnew = set(), set(), set()
        for i in sol.solution:
            pall.add(i.minor)
            pmiss |= set((m.pos, m.op) for m in i.missing)
            pnew |= set((m.pos, m.op) for m in i.added)

        assert eall == pall, "Alleles"
        assert emiss == pmiss, "Missing mutations"
        assert enew == pnew, "Novel mutations"

    assert_sol(sols[0], [("1.001", [], [(42527470, "C>T")]), ("4.001", [], [])])
    assert_sol(
        sols[1],
        [
            ("10.001", [(42525131, "C>G")], []),
            (
                "4.012",
                [],
                [
                    (42526483, "C>A"),
                    (42527792, "C>T"),
                    (42527470, "C>T"),
                    (42528223, "G>A"),
                ],
            ),
        ],
    )
    assert_sol(
        sols[2],
        [
            ("39.002", [], []),
            (
                "4.009",
                [],
                [
                    (42522391, "G>A"),
                    (42523210, "T>C"),
                    (42523408, "T>G"),
                    (42524695, "T>C"),
                    (42525951, "A>C"),
                    (42526048, "G>C"),
                    (42526483, "C>A"),
                    (42527470, "C>T"),
                    (42527792, "C>T"),
                    (42528027, "T>C"),
                    (42528223, "G>A"),
                ],
            ),
        ],
    )

    assert sols[0].score < sols[1].score < sols[2].score


def test_major_novel(real_gene, solver):
    assert_minor(
        real_gene,
        solver,
        {
            "cn": {"1": 2},
            "major": ({"35": 1, "4C": 1}, (42525810, "T>C")),
            "data": {
                (42522311, "C>T"): 279,
                (42522311, "_"): 147,
                (42522391, "G>A"): 315,
                (42522391, "_"): 317,
                (42522612, "C>G"): 618,
                (42522612, "_"): 0,
                (42523002, "G>A"): 477,
                (42523002, "_"): 245,
                (42523208, "C>T"): 316,
                (42523208, "_"): 410,
                (42523210, "T>C"): 419,
                (42523210, "_"): 316,
                (42523408, "T>G"): 858,
                (42523408, "_"): 0,
                (42523942, "G>A"): 356,
                (42523942, "_"): 326,
                (42524695, "T>C"): 301,
                (42524695, "_"): 322,
                (42524946, "C>T"): 362,
                (42524946, "_"): 387,
                (42525131, "C>G"): 304,
                (42525131, "_"): 0,
                (42525755, "G>A"): 0,
                (42525755, "_"): 359,
                (42525797, "G>C"): 31,
                (42525797, "_"): 56,
                (42525810, "T>C"): 16,
                (42525810, "_"): 19,
                (42525951, "A>C"): 831,
                (42525951, "_"): 0,
                (42526048, "G>C"): 850,
                (42526048, "_"): 0,
                (42526483, "C>A"): 660,
                (42526483, "_"): 0,
                (42526693, "G>A"): 417,
                (42526693, "_"): 311,
                (42526762, "C>T"): 535,
                (42526762, "_"): 460,
                (42526930, "T>C"): 0,
                (42526930, "_"): 563,
                (42527470, "C>T"): 573,
                (42527470, "_"): 257,
                (42527532, "G>A"): 354,
                (42527532, "_"): 222,
                (42527792, "C>T"): 273,
                (42527792, "_"): 263,
                (42528027, "T>C"): 14,
                (42528027, "_"): 1,
                (42528381, "G>C"): 6,
                (42528381, "_"): 12,
            },
            "sol": [
                (
                    "35.001",
                    [
                        (42526579, "C>G"),
                        (42526572, "G>T"),
                        (42526548, "T>C"),
                        (42526570, "G>C"),
                        (42526566, "A>G"),
                        (42526560, "T>G"),
                        (42526561, "C>G"),
                    ],
                    [],
                ),
                (
                    "4.011",
                    [],
                    [
                        (42525810, "T>C"),
                        (42526048, "G>C"),
                        (42523408, "T>G"),
                        (42525951, "A>C"),
                        (42528027, "T>C"),
                        (42526483, "C>A"),
                    ],
                ),
            ],
        },
        shallow=True,
    )


def test_minor_novel(real_gene, solver):
    assert_minor(
        real_gene,
        solver,
        {
            "cn": {"1": 2},
            "major": ({"1": 2}, (42522957, "T>G")),
            "data": {(42522957, "T>G"): 100},
            "sol": [
                ("1.001", [], [(42522957, "T>G")]),
                ("1.001", [], [(42522957, "T>G")]),
            ],
        },
    )
