Source code for constrain.lab.PCR

#!/usr/bin/env python
# MIT License
# Copyright (c) 2022, Technical University of Denmark (DTU)
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

""" This part of the lab module is used for simulating and calculating PCR reactions."""

# standard libraries
import textwrap as _textwrap
import math
import csv
import json

# Extra
import pandas as pd
from pydna._pretty import pretty_str as _pretty_str
import requests


[docs]def primer_tm_neb(primer, conc=0.5, prodcode="q5-0"):
    """Calculates a single primers melting temp from NEB.

    Parameters
    ----------
    primer1 : str
    conc : float
    prodcode : str
        find product codes on nebswebsite: https://tmapi.neb.com/docs/productcodes

    Returns
    -------
    tm : int
        primer melting temperature

    """

    url = "https://tmapi.neb.com/tm/batch"
    seqpairs = [[primer]]

    input = {"seqpairs": seqpairs, "conc": conc, "prodcode": prodcode}
    headers = {"content-type": "application/json"}
    res = requests.post(url, data=json.dumps(input), headers=headers)

    r = json.loads(res.content)

    if r["success"]:
        for row in r["data"]:
            return row["tm1"]
    else:
        print("request failed")
        print(r["error"][0])


[docs]def primer_ta_neb(primer1, primer2, conc=0.5, prodcode="q5-0"):
    """Calculates primer pair melting temp TA,  from NEB.

    Parameters
    ----------
    primer1 : str
        first primer to be used for finding the optimal ta
    primer2 : str
        second primer to be used for finding the optimal ta
    conc : float
    prodcode : str
        find product codes on nebswebsite: https://tmapi.neb.com/docs/productcodes

    Returns
    -------
    ta : int
        primer pair annealing temp

    """

    url = "https://tmapi.neb.com/tm/batch"
    seqpairs = [[primer1, primer2]]

    input = {"seqpairs": seqpairs, "conc": conc, "prodcode": prodcode}
    headers = {"content-type": "application/json"}
    res = requests.post(url, data=json.dumps(input), headers=headers)

    r = json.loads(res.content)

    if r["success"]:
        for row in r["data"]:
            return row["ta"]

    else:
        print("request failed")
        print(r["error"][0])


[docs]def grouper(iterable, max_diff):
    """Groups objects into distinct groups based on differences"""
    prev = None
    group = []
    for item in iterable:
        if not prev or item - prev <= max_diff:
            group.append(item)
        else:
            yield group
            group = [item]
        prev = item
    if group:
        yield group


[docs]def calculate_volumes(
    vol_p_reac=0, no_of_reactions=1, standard_reagents=[], standard_volumes=[]
):
    """Can make a reaction scheme for PCR master mixes.
    Parameters
    ----------
    vol_p_reac : int
    no_of_reactions : int
    standard_reagents : list
    standard_volumes : list

    Returns
    -------
    pd.DataFrame

    Examples
    --------
    calculate_volumes(vol_p_reac = 10,
                no_of_reactions = 6,
                standard_reagents = ["DNA","Buffer, Cutsmart","H20","Enz, USER"],
                standard_volumes = [1,1,7,1])

     The following reaction scheme will be made:
     -------------------------
                     vol_p_reac	vol_p_x_reac
     DNA	            1.0	        6.0
     Buffer, Cutsmart	1.0	        6.0
     H20	            7.0	        42.0
     Enz, USER	        1.0	        6.0
     Total	            10.0      	60.0
     -------------------------

    """
    standard_total_volume = sum(standard_volumes)
    volumes_p_x = [val / standard_total_volume * vol_p_reac for val in standard_volumes]
    volumes_p_x_p_y_reactions = [val * no_of_reactions for val in volumes_p_x]

    volumes_p_x_plus_total = volumes_p_x + [sum(volumes_p_x)]
    volumes_p_x_p_y_reactions_plus_total = volumes_p_x_p_y_reactions + [
        sum(volumes_p_x_p_y_reactions)
    ]
    reagents_plus_total = standard_reagents + ["Total"]

    volumes_df = pd.DataFrame(
        data={
            "vol_p_reac": volumes_p_x_plus_total,
            "vol_p_"
            + str(no_of_reactions)
            + "_reac": volumes_p_x_p_y_reactions_plus_total,
        },
        index=reagents_plus_total,
    )
    return volumes_df


[docs]def calculate_processing_speed(amplicon):
    """Determines process speed based on the which polymerase is used.

    Parameters
    ----------
    amplicon : pydna.amplicon

    Returns
    -------
    Adds annotations to the amplicon object dependent on which polymerase was used

    Notes
    -----
    The amplicon needs to have the following dict incorporated:
    amplicon.annotations["polymerase"]
    """

    if "proc_speed" in amplicon.forward_primer.annotations:
        print("proc_speed already set")
        return amplicon

    # proc_speed units are seconds/kb
    if amplicon.annotations["polymerase"] == "OneTaq Hot Start":
        proc_speed = 60
    elif amplicon.annotations["polymerase"] == "Q5 Hot Start":
        proc_speed = 30
    elif amplicon.annotations["polymerase"] == "Phusion":
        proc_speed = 30

    amplicon.annotations["proc_speed"] = proc_speed

    return amplicon


[docs]def calculate_elongation_time(amplicon):

    """Determines elongation time for an amplicon
    and add the elongation time to the amplicon annotations

    Parameters
    ----------
    amplicon : pydna.amplicon

    Returns
    -------
    Adds the elongation time to the amplicon annotations

    Notes
    -----
    The amplicon needs to have a dict called proc_speed shown as follows:
    amplicon.annotations["proc_speed"]
    This dict within the annotations can be made with the function proc_speed.

    """

    if "elongation_time" in amplicon.forward_primer.annotations:
        print("elongation_time already set")
        return amplicon

    # elongation_time units are seconds
    elongation_time = amplicon.annotations["proc_speed"] * len(amplicon) / 1000
    amplicon.annotations["elongation_time"] = math.ceil(elongation_time)

    return amplicon


[docs]def calculate_required_thermal_cyclers(amplicons: list, polymerase: str, elong_time_max_diff=15):
    """Determines the number of thermalcyclers that is needed
    based on elongation time differences

    Parameters
    ----------
    amplicons : list
        of pydna.amplicon objects
    polymerase : str

    Returns
    -------
    pd.DataFrame
        dataframe of grouped amplicons

    """

    amp_names = [amplicon.name for amplicon in amplicons]
    elong_times = [amplicon.annotations["elongation_time"] for amplicon in amplicons]
    tas = [amplicon.annotations["ta " + polymerase] for amplicon in amplicons]
    order = list(range(0, len(amplicons)))

    list_of_tuples = list(zip(amp_names, tas, elong_times, order))

    list_of_tuples.sort()

    groups = dict(enumerate(grouper(elong_times, elong_time_max_diff), 1))

    list_of_lists = [list(elem) for elem in list_of_tuples]

    for gNo, gTimes in groups.items():
        # print(gNo, gTimes)
        for idx, lst in enumerate(list_of_lists):
            if lst[2] in gTimes:
                list_of_lists[idx][2] = max(gTimes)

    thermal_cyclers = pd.DataFrame(
        list_of_lists, columns=["amplicons", "tas", "elong_times", "order"]
    )
    thermal_cyclers = thermal_cyclers.sort_values(["order"])
    thermal_cyclers = (
        thermal_cyclers.groupby(["tas", "elong_times"])["amplicons"]
        .apply(", ".join)
        .reset_index()
    )

    return thermal_cyclers


[docs]def pcr_locations(amplicons: list):
    """Obtain information annotation information from amplicons.

    Parameters
    ----------
    amplicon : list
        List of amplicon objects `pydna.amplicon`() # check this

    Returns
    -------
    pd.DataFrame
        Pandas dataframe with locations of your amplicons
    """
    # initialization
    product_loc = []
    product_names = []
    template_loc = []
    fw_loc = []
    rv_loc = []

    for i in range(0, len(amplicons)):
        product_names.append(amplicons[i].name)

        # Test if batches is present
        if (
            "batches" in amplicons[i].template.annotations.keys()
            and len(amplicons[i].template.annotations["batches"]) != 0
        ):
            product_loc.append(
                amplicons[i].template.annotations["batches"][0]["location"]
            )
            template_loc.append(
                amplicons[i].template.annotations["batches"][0]["location"]
            )
        elif (
            "batches" in amplicons[i].annotations.keys()
            and len(amplicons[i].annotations["batches"]) != 0
        ):
            product_loc.append(amplicons[i].annotations["batches"][0]["location"])
            template_loc.append(amplicons[i].annotations["batches"][0]["location"])
        else:
            product_loc.append("Empty")
            template_loc.append("Empty")
            print(
                "No batches were found for "
                + str(amplicons[i].name)
                + ". Please check the object."
            )

        # Save primer locations
        if (
            "batches" in amplicons[i].forward_primer.annotations.keys()
            and len(amplicons[i].forward_primer.annotations["batches"]) != 0
        ):
            fw_loc.append(
                amplicons[i].forward_primer.annotations["batches"][0]["location"]
            )
        else:
            fw_loc.append("Empty")
            print(str(amplicons[i].name) + ": Foward primer location was not found")
        if (
            "batches" in amplicons[i].reverse_primer.annotations.keys()
            and len(amplicons[i].reverse_primer.annotations["batches"]) != 0
        ):
            rv_loc.append(
                amplicons[i].reverse_primer.annotations["batches"][0]["location"]
            )
        else:
            rv_loc.append("Empty")
            print(str(amplicons[i].name) + ": Reverse primer location was not found")

    # Save information as dataframe
    df_pcr = pd.DataFrame(
        list(zip(product_loc, product_names, template_loc, fw_loc, rv_loc)),
        columns=["location", "name", "template", "fw", "rv"],
    )

    return df_pcr


[docs]def nanophotometer_concentrations(
    path="",
):

    """Reads a CSV file with nanophotometer concentraions
    and returns the concentrations in a list

    Parameters
    ----------
    path : str
        path to file
    Returns
    -------
    concentrations : list
        list of concentrations from the file as floats
    """
    concentrations = []
    with open(path, encoding="Latin1") as tsvfile:
        reader = csv.reader(tsvfile, delimiter="\t")
        next(reader)[4]
        for row in reader:
            conc = float(row[4].replace(",", "."))
            concentrations.append(conc)

    return concentrations


[docs]def amplicon_by_name(name: str, amplicons_lst: list):
    """Returns amplicon with specified name

    Parameters
    ----------
    name : str
    amplicons_lst : list

    Returns
    -------
    amplicon : pydna.amplicon

    """
    for amplicon in amplicons_lst:
        if amplicon.name == name:
            return amplicon


[docs]def Q5_NEB_PCR_program(amplicon):

    """Simple PCR program designed to give a quick visual representations.

    Parameters
    ----------
    amplicon : pydna.amplicon
        pydna amplicon object

    Returns
    -------
    str
        schematic representation of a Q5 program
    """
    # Determine elongation time and process speed.
    amplicon = calculate_elongation_time(amplicon)
    amplicon = calculate_processing_speed(amplicon)

    # ta
    amplicon.annotations["ta Q5 Hot Start"] = primer_ta_neb(
        str(amplicon.forward_primer.seq), str(amplicon.reverse_primer.seq)
    )

    # tm forward and reverse
    amplicon.forward_primer.annotations["tm Q5 Hot Start"] = primer_tm_neb(
        str(amplicon.forward_primer.seq)
    )
    amplicon.reverse_primer.annotations["tm Q5 Hot Start"] = primer_tm_neb(
        str(amplicon.reverse_primer.seq)
    )

    r"""Returns a string containing a text representation of a suggested
    PCR program using Taq or similar polymerase.
    ::
     |98°C|98°C               |    |tmf:59.5
     |____|_____          72°C|72°C|tmr:59.7
     |30s |10s  \ 59.1°C _____|____|30s/kb
     |    |      \______/ 0:32|5min|GC 51%
     |    |       30s         |    |1051bp
    """

    formated = _textwrap.dedent(
        r"""
                            |98°C|98°C               |    |tmf:{tmf:.1f}
                            |____|_____          72°C|72°C|tmr:{tmr:.1f}
                            |30 s|10s  \ {ta:.1f}°C _____|____|{rate}s/kb
                            |    |      \______/{0:2}:{1:2}|2min|GC {GC_prod}%
                            |    |       20s         |    |{size}bp
                            """[
            1:-1
        ].format(
            rate=amplicon.annotations["proc_speed"],
            size=len(amplicon.seq),
            ta=amplicon.annotations["ta Q5 Hot Start"],
            tmf=amplicon.forward_primer.annotations["tm Q5 Hot Start"],
            tmr=amplicon.reverse_primer.annotations["tm Q5 Hot Start"],
            GC_prod=round(amplicon.gc() * 100, 2),
            *map(int, divmod(amplicon.annotations["elongation_time"], 60)),
        )
    )

    return _pretty_str(formated)



[docs]def set_plate_locations(amplicons:list): 
    '''Makes a dataframe from amplicons
    
    Parameters
    ----------
    
    amplicons : list 
        list of pydna.amplicon objects
        
    Returns
    -------
    pd.DataFrame 
        with overview of plate locations'''
    
    plate_locations = []
    for amplicon in amplicons:
        plate_locations.append([amplicon.name, 
                                amplicon.annotations['batches'][0]['location'],
                                amplicon.annotations['template_name'],
                                amplicon.template.annotations['batches'][0]['location'],
                                amplicon.forward_primer.id,
                                amplicon.forward_primer.annotations['batches'][0]['location'],
                                amplicon.reverse_primer.id,
                                amplicon.reverse_primer.annotations['batches'][0]['location']
                               ])
    amplicon_df = pd.DataFrame(plate_locations, columns=['name', 'location','template_name', 'template_location','fw_name','fw_location','rv_name','rv_location'])
    amplicon_df = amplicon_df.set_index('name')
    
    return amplicon_df


[docs]def update_amplicon_annotations(amplicon_names:list,amplicons:list,  locations:list, concentrations:list,volumes:list )->None: 
    """Updates the annotations of amplicons in the amplicon list.
    
    Parameters
    ----------
    amplicon_names : list
        List of amplicon names.
    locations : list
        List of locations for each amplicon.
    concentrations : list
        List of concentrations for each amplicon.
    volumes : list
        List of volumes for each amplicon.

    Returns
    -------
    None
    """
    for i in range(len(amplicon_names)): 
        amplicon_by_name(amplicon_names[i], amplicons).annotations['batches'][0]['location'] = locations[i]
        amplicon_by_name(amplicon_names[i], amplicons).annotations['batches'][0]['concentration'] =concentrations[i]
        amplicon_by_name(amplicon_names[i], amplicons).annotations['batches'][0]['volume'] = volumes[i]
        

## Maybe redundant
#def get_amplicons_by_row(row, amplicon_df, amplicons):
#    """Returns a list of amplicons in a given gel row.
#
#    Parameters
#    ----------
#    row : str
#        Name of the gel row.
#    amplicon_df : pandas DataFrame
#        DataFrame with amplicon information, including the column 'prow' indicating the gel row.
#    amplicons : list of Amplicon
#        List of Amplicon objects.
#
#    Returns
#    -------
#    list of Amplicon
#        List of Amplicon objects in the given gel row.
#    """
#    row_names = amplicon_df[amplicon_df['prow']==row][['name']]['name'].tolist()
#
#    row_amplicons = []
#    for name in row_names:
#        for amplicon in amplicons:
#            if amplicon.name == name:
#                row_amplicons.append([amplicon])
#
#    return(row_amplicons)
#
#
#
#def get_amplicons_by_column(col, amplicon_df, amplicons):
#    """
#    Returns a list of amplicons in a given gel column.
#
#    Parameters
#    ----------
#    col : str
#        Name of the gel column.
#    amplicon_df : pandas DataFrame
#        DataFrame with amplicon information, including the column 'pcol' indicating the gel column.
#    amplicons : list of Amplicon
#        List of Amplicon objects.
#
#    Returns
#    -------
#    list of Amplicon
#        List of Amplicon objects in the given gel column.
#    """
#    col_names = amplicon_df[amplicon_df['pcol']==col][['name']]['name'].tolist()
#    
#    col_amplicons = []
#    for name in col_names:
#        for amplicon in amplicons:
#            if amplicon.name == name:
#                col_amplicons.append([amplicon])
#                
#    return(col_amplicons)