"""
kir-mapper to allele table conversion module.
This module reads the reports of kir-mapper and generates
the allele table. Some filtering based on depth and allele
mismatch can be performed.
Author: Nicolás Mendoza Mejía (2025)
"""
from typing import List
import pandas as pd
from alleleTools.allele import AlleleParser
from alleleTools.format.alleleTable import AlleleTable
from alleleTools.format.from_ikmb_hla import ConsensusGene
from ..argtypes import add_out_altable_args, csv_file, file_path, output_path
[docs]
def setup_parser(subparsers):
"""
Set up the argument parser for the kir-mapper command.
Args:
subparsers: The subparsers object to add this command to.
Returns:
argparse.ArgumentParser: The configured parser for kir-mapper.
"""
parser = subparsers.add_parser(
name="from_kirmapper",
help="Convert kir-mapper reports to allele table format",
description="Convert kir-mapper reports to allele table format",
epilog="Author: Nicolás Mendoza Mejía (2025)",
)
# Input/output arguments
parser.add_argument(
"input",
metavar="path",
type=file_path,
nargs="+",
help="Report files from kir-mapper",
)
parser = add_out_altable_args(parser)
parser.set_defaults(func=call_function)
return parser
[docs]
def call_function(args):
"""
Main function to execute the kir-mapper report to allele table conversion.
Args:
args: Parsed command line arguments
"""
reports = read_reports(args.input)
reports = exclude_high_missings(reports, threshold=5)
reports = parse_alleles(reports)
all_consensus = dict()
for sample, row in reports.iterrows():
report = row.to_dict()
parser = AlleleParser(
gene_family=args.gene_family, config_file=args.config_file
)
gene = ConsensusGene(
name=report["Gene"], calls=report["Calls"], allele_parser=parser
)
gene.set_consensus_settings(
normalize_weight=False, max_support=len(report["Calls"])
)
consensus = gene.consensus_dict(min_support=0.6)
consensus["original_calls"] = report["Calls"]
all_consensus[sample] = consensus
allele_table = pd.DataFrame.from_dict(all_consensus, orient="index")
allele_table.index.name = "SampleID"
gene = reports["Gene"].unique()[0]
allele_table[[gene + "_1", gene + "_2"]
] = allele_table["alleles"].apply(lambda x: pd.Series(x))
alt = AlleleTable()
alt.alleles = allele_table.drop(
columns=["alleles", "original_calls", "coverage", "gene", "support"])
alt.load_phenotype(args.phenotype)
print(alt.alleles.head())
if args.remove_pheno_zero:
alt.remove_phenotype_zero()
alt.to_csv(args.output)
[docs]
def read_reports(files: List[str]) -> pd.DataFrame:
all_dfs = list()
for file in files:
df = pd.read_csv(file, sep="\t", header=0, index_col=0)
df["Gene"] = file.split(".")[0]
all_dfs.append(df)
return pd.concat(all_dfs)
[docs]
def split_alleles(x: str):
genotypes = x.split(";")
# Add algorithm name (numbers) and filter alleles
ret = dict()
for idx, key in enumerate(genotypes):
alleles = key.split("+")
# Rename null to 000 alleles and unresolved to empty string
alleles = [allele.replace("null", "000") for allele in alleles]
alleles = [
allele if "unresolved" not in allele else "" for allele in alleles]
ret["kir-mapper" + str(idx)] = alleles
return ret
[docs]
def parse_alleles(df: pd.DataFrame) -> pd.DataFrame:
alleles = df["Calls"].apply(split_alleles)
df["Calls"] = alleles
return df
[docs]
def exclude_high_missings(df: pd.DataFrame, threshold: float = 5):
# Get minimum missings
df["MinMiss"] = df["Missings"].apply(get_min_number)
# Filter alleles with high missings
df = df[(df["MinMiss"] < threshold) | (df["MinMiss"].isna())]
df.drop("MinMiss", axis=1)
return df
[docs]
def get_min_number(input: str):
if not input or not isinstance(input, str):
return input
nums = [int(i) for i in input.split(";")]
return min(nums)