"""UIDs.

Entity-related generators:

.. autofunction:: ontology
.. autofunction:: source

"""

import hashlib
import secrets
import string


def base62(n_char: int) -> str:
    """Random Base62 string."""
    alphabet = string.digits + string.ascii_letters.swapcase()
    id = "".join(secrets.choice(alphabet) for i in range(n_char))
    return id


def encode_base62(s: str) -> str:
    from lamin_utils._base62 import encodebytes

    return encodebytes(hashlib.md5(s.encode()).digest())


def hash_id(input_id: str | None = None, *, n_char: int) -> str:
    if input_id is None:
        return base62(n_char=n_char)
    else:
        return encode_base62(input_id)[:n_char]


def gene(input_id: str | None = None) -> str:
    """12 base62."""
    return hash_id(input_id, n_char=12)


def protein(input_id: str | None = None) -> str:
    """12 base62."""
    return hash_id(input_id, n_char=12)


def cellmarker(input_id: str | None = None) -> str:
    """12 base62."""
    return hash_id(input_id, n_char=12)


def ontology(input_id: str | None = None):
    """14 base62."""
    return hash_id(input_id, n_char=14)


def source(input_id: str | None = None):
    """8 base62."""
    return hash_id(input_id, n_char=8)


def encode_uid(registry, kwargs: dict):
    """The type passed needs to be a subclass of BioRecord."""
    from lamindb.models import SQLRecord

    if kwargs.get("uid") is not None:
        # if uid is passed, no encoding is needed
        return kwargs
    registry_name = registry.__get_name_with_module__()
    if registry.__base__.__name__ == "BioRecord" and registry.require_organism():
        organism = kwargs.get("organism")
        if organism is None:
            organism_id = kwargs.get("organism_id")
            if organism_id is not None:
                from .models import Organism

                organism = Organism.get(id=organism_id).name
        elif isinstance(organism, SQLRecord):
            organism = organism.name
    else:
        organism = ""

    ontology_id_field = (
        registry._ontology_id_field
        if hasattr(registry, "_ontology_id_field")
        else "ontology_id"
    )
    name_field = registry._name_field if hasattr(registry, "_name_field") else "name"

    str_to_encode = ""
    if registry_name == "bionty.Source":
        str_to_encode = f"{kwargs.get('entity', '')}{kwargs.get('name', '')}{kwargs.get('organism', '')}{kwargs.get('version', '')}"
    elif registry_name == "bionty.Gene":  # gene has multiple id fields
        str_to_encode = kwargs.get(ontology_id_field, "")
        if not str_to_encode:
            str_to_encode = kwargs.get("stable_id", "")
        if not str_to_encode:
            str_to_encode = f"{kwargs.get(name_field, '')}{organism}"  # name + organism
        if not str_to_encode:
            raise AssertionError(
                f"must provide {ontology_id_field}, stable_id or {name_field}"
            )
    else:
        str_to_encode = kwargs.get(
            ontology_id_field, ""
        )  # default to encode ontology_id
        if not str_to_encode:
            str_to_encode = f"{kwargs.get(name_field, '')}{organism}"  # name + organism
        if not str_to_encode:
            raise AssertionError(f"must provide {ontology_id_field} or {name_field}")

    if str_to_encode:
        id_encoder = source if registry_name == "bionty.Source" else ontology
        kwargs["uid"] = id_encoder(str_to_encode)
    return kwargs


def encode_uid_for_hub(registry_name: str, registry_schema_json: dict, kwargs: dict):
    """Encode the uid for the hub.

    Note that `organism` record must be passed in kwargs instead of `organism_id`.
    """
    if kwargs.get("uid") is not None:
        # if uid is passed, no encoding is needed
        return kwargs
    name = registry_name.lower()
    # here we need to pass the organism record, not organism_id
    organism = kwargs.get("organism", "")
    if organism:
        organism = organism.get("name", "")

    # default to ontology_id
    ontology_id_field = registry_schema_json.get("_ontology_id_field", "ontology_id")
    name_field = registry_schema_json.get("_name_field", "name")

    str_to_encode = ""
    if name == "source":
        str_to_encode = f"{kwargs.get('entity', '')}{kwargs.get('name', '')}{kwargs.get('organism', '')}{kwargs.get('version', '')}"
    elif name == "gene":  # gene has multiple id fields
        str_to_encode = kwargs.get(ontology_id_field, "")
        if not str_to_encode:
            str_to_encode = kwargs.get("stable_id", "")
        if not str_to_encode:
            str_to_encode = f"{kwargs.get(name_field, '')}{organism}"  # name + organism
        if not str_to_encode:
            raise AssertionError(
                f"must provide {ontology_id_field}, stable_id or {name_field}"
            )
    else:
        str_to_encode = kwargs.get(ontology_id_field, "")
        if not str_to_encode:
            str_to_encode = f"{kwargs.get(name_field, '')}{organism}"  # name + organism
        if not str_to_encode:
            raise AssertionError(f"must provide {ontology_id_field} or {name_field}")

    if str_to_encode:
        id_encoder = source if name == "source" else ontology
        kwargs["uid"] = id_encoder(str_to_encode)
    return kwargs
