Source code for constrain.lab.PCR

#!/usr/bin/env python
# MIT License
# Copyright (c) 2022, Technical University of Denmark (DTU)
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

""" This part of the lab module is used for simulating and calculating PCR reactions."""

# standard libraries
import textwrap as _textwrap
import math
import csv
import json

# Extra
import pandas as pd
from pydna._pretty import pretty_str as _pretty_str
import requests


[docs]def primer_tm_neb(primer, conc=0.5, prodcode="q5-0"): """Calculates a single primers melting temp from NEB. Parameters ---------- primer1 : str conc : float prodcode : str find product codes on nebswebsite: https://tmapi.neb.com/docs/productcodes Returns ------- tm : int primer melting temperature """ url = "https://tmapi.neb.com/tm/batch" seqpairs = [[primer]] input = {"seqpairs": seqpairs, "conc": conc, "prodcode": prodcode} headers = {"content-type": "application/json"} res = requests.post(url, data=json.dumps(input), headers=headers) r = json.loads(res.content) if r["success"]: for row in r["data"]: return row["tm1"] else: print("request failed") print(r["error"][0])
[docs]def primer_ta_neb(primer1, primer2, conc=0.5, prodcode="q5-0"): """Calculates primer pair melting temp TA, from NEB. Parameters ---------- primer1 : str first primer to be used for finding the optimal ta primer2 : str second primer to be used for finding the optimal ta conc : float prodcode : str find product codes on nebswebsite: https://tmapi.neb.com/docs/productcodes Returns ------- ta : int primer pair annealing temp """ url = "https://tmapi.neb.com/tm/batch" seqpairs = [[primer1, primer2]] input = {"seqpairs": seqpairs, "conc": conc, "prodcode": prodcode} headers = {"content-type": "application/json"} res = requests.post(url, data=json.dumps(input), headers=headers) r = json.loads(res.content) if r["success"]: for row in r["data"]: return row["ta"] else: print("request failed") print(r["error"][0])
[docs]def grouper(iterable, max_diff): """Groups objects into distinct groups based on differences""" prev = None group = [] for item in iterable: if not prev or item - prev <= max_diff: group.append(item) else: yield group group = [item] prev = item if group: yield group
[docs]def calculate_volumes( vol_p_reac=0, no_of_reactions=1, standard_reagents=[], standard_volumes=[] ): """Can make a reaction scheme for PCR master mixes. Parameters ---------- vol_p_reac : int no_of_reactions : int standard_reagents : list standard_volumes : list Returns ------- pd.DataFrame Examples -------- calculate_volumes(vol_p_reac = 10, no_of_reactions = 6, standard_reagents = ["DNA","Buffer, Cutsmart","H20","Enz, USER"], standard_volumes = [1,1,7,1]) The following reaction scheme will be made: ------------------------- vol_p_reac vol_p_x_reac DNA 1.0 6.0 Buffer, Cutsmart 1.0 6.0 H20 7.0 42.0 Enz, USER 1.0 6.0 Total 10.0 60.0 ------------------------- """ standard_total_volume = sum(standard_volumes) volumes_p_x = [val / standard_total_volume * vol_p_reac for val in standard_volumes] volumes_p_x_p_y_reactions = [val * no_of_reactions for val in volumes_p_x] volumes_p_x_plus_total = volumes_p_x + [sum(volumes_p_x)] volumes_p_x_p_y_reactions_plus_total = volumes_p_x_p_y_reactions + [ sum(volumes_p_x_p_y_reactions) ] reagents_plus_total = standard_reagents + ["Total"] volumes_df = pd.DataFrame( data={ "vol_p_reac": volumes_p_x_plus_total, "vol_p_" + str(no_of_reactions) + "_reac": volumes_p_x_p_y_reactions_plus_total, }, index=reagents_plus_total, ) return volumes_df
[docs]def calculate_processing_speed(amplicon): """Determines process speed based on the which polymerase is used. Parameters ---------- amplicon : pydna.amplicon Returns ------- Adds annotations to the amplicon object dependent on which polymerase was used Notes ----- The amplicon needs to have the following dict incorporated: amplicon.annotations["polymerase"] """ if "proc_speed" in amplicon.forward_primer.annotations: print("proc_speed already set") return amplicon # proc_speed units are seconds/kb if amplicon.annotations["polymerase"] == "OneTaq Hot Start": proc_speed = 60 elif amplicon.annotations["polymerase"] == "Q5 Hot Start": proc_speed = 30 elif amplicon.annotations["polymerase"] == "Phusion": proc_speed = 30 amplicon.annotations["proc_speed"] = proc_speed return amplicon
[docs]def calculate_elongation_time(amplicon): """Determines elongation time for an amplicon and add the elongation time to the amplicon annotations Parameters ---------- amplicon : pydna.amplicon Returns ------- Adds the elongation time to the amplicon annotations Notes ----- The amplicon needs to have a dict called proc_speed shown as follows: amplicon.annotations["proc_speed"] This dict within the annotations can be made with the function proc_speed. """ if "elongation_time" in amplicon.forward_primer.annotations: print("elongation_time already set") return amplicon # elongation_time units are seconds elongation_time = amplicon.annotations["proc_speed"] * len(amplicon) / 1000 amplicon.annotations["elongation_time"] = math.ceil(elongation_time) return amplicon
[docs]def calculate_required_thermal_cyclers(amplicons: list, polymerase: str, elong_time_max_diff=15): """Determines the number of thermalcyclers that is needed based on elongation time differences Parameters ---------- amplicons : list of pydna.amplicon objects polymerase : str Returns ------- pd.DataFrame dataframe of grouped amplicons """ amp_names = [amplicon.name for amplicon in amplicons] elong_times = [amplicon.annotations["elongation_time"] for amplicon in amplicons] tas = [amplicon.annotations["ta " + polymerase] for amplicon in amplicons] order = list(range(0, len(amplicons))) list_of_tuples = list(zip(amp_names, tas, elong_times, order)) list_of_tuples.sort() groups = dict(enumerate(grouper(elong_times, elong_time_max_diff), 1)) list_of_lists = [list(elem) for elem in list_of_tuples] for gNo, gTimes in groups.items(): # print(gNo, gTimes) for idx, lst in enumerate(list_of_lists): if lst[2] in gTimes: list_of_lists[idx][2] = max(gTimes) thermal_cyclers = pd.DataFrame( list_of_lists, columns=["amplicons", "tas", "elong_times", "order"] ) thermal_cyclers = thermal_cyclers.sort_values(["order"]) thermal_cyclers = ( thermal_cyclers.groupby(["tas", "elong_times"])["amplicons"] .apply(", ".join) .reset_index() ) return thermal_cyclers
[docs]def pcr_locations(amplicons: list): """Obtain information annotation information from amplicons. Parameters ---------- amplicon : list List of amplicon objects `pydna.amplicon`() # check this Returns ------- pd.DataFrame Pandas dataframe with locations of your amplicons """ # initialization product_loc = [] product_names = [] template_loc = [] fw_loc = [] rv_loc = [] for i in range(0, len(amplicons)): product_names.append(amplicons[i].name) # Test if batches is present if ( "batches" in amplicons[i].template.annotations.keys() and len(amplicons[i].template.annotations["batches"]) != 0 ): product_loc.append( amplicons[i].template.annotations["batches"][0]["location"] ) template_loc.append( amplicons[i].template.annotations["batches"][0]["location"] ) elif ( "batches" in amplicons[i].annotations.keys() and len(amplicons[i].annotations["batches"]) != 0 ): product_loc.append(amplicons[i].annotations["batches"][0]["location"]) template_loc.append(amplicons[i].annotations["batches"][0]["location"]) else: product_loc.append("Empty") template_loc.append("Empty") print( "No batches were found for " + str(amplicons[i].name) + ". Please check the object." ) # Save primer locations if ( "batches" in amplicons[i].forward_primer.annotations.keys() and len(amplicons[i].forward_primer.annotations["batches"]) != 0 ): fw_loc.append( amplicons[i].forward_primer.annotations["batches"][0]["location"] ) else: fw_loc.append("Empty") print(str(amplicons[i].name) + ": Foward primer location was not found") if ( "batches" in amplicons[i].reverse_primer.annotations.keys() and len(amplicons[i].reverse_primer.annotations["batches"]) != 0 ): rv_loc.append( amplicons[i].reverse_primer.annotations["batches"][0]["location"] ) else: rv_loc.append("Empty") print(str(amplicons[i].name) + ": Reverse primer location was not found") # Save information as dataframe df_pcr = pd.DataFrame( list(zip(product_loc, product_names, template_loc, fw_loc, rv_loc)), columns=["location", "name", "template", "fw", "rv"], ) return df_pcr
[docs]def nanophotometer_concentrations( path="", ): """Reads a CSV file with nanophotometer concentraions and returns the concentrations in a list Parameters ---------- path : str path to file Returns ------- concentrations : list list of concentrations from the file as floats """ concentrations = [] with open(path, encoding="Latin1") as tsvfile: reader = csv.reader(tsvfile, delimiter="\t") next(reader)[4] for row in reader: conc = float(row[4].replace(",", ".")) concentrations.append(conc) return concentrations
[docs]def amplicon_by_name(name: str, amplicons_lst: list): """Returns amplicon with specified name Parameters ---------- name : str amplicons_lst : list Returns ------- amplicon : pydna.amplicon """ for amplicon in amplicons_lst: if amplicon.name == name: return amplicon
[docs]def Q5_NEB_PCR_program(amplicon): """Simple PCR program designed to give a quick visual representations. Parameters ---------- amplicon : pydna.amplicon pydna amplicon object Returns ------- str schematic representation of a Q5 program """ # Determine elongation time and process speed. amplicon = calculate_elongation_time(amplicon) amplicon = calculate_processing_speed(amplicon) # ta amplicon.annotations["ta Q5 Hot Start"] = primer_ta_neb( str(amplicon.forward_primer.seq), str(amplicon.reverse_primer.seq) ) # tm forward and reverse amplicon.forward_primer.annotations["tm Q5 Hot Start"] = primer_tm_neb( str(amplicon.forward_primer.seq) ) amplicon.reverse_primer.annotations["tm Q5 Hot Start"] = primer_tm_neb( str(amplicon.reverse_primer.seq) ) r"""Returns a string containing a text representation of a suggested PCR program using Taq or similar polymerase. :: |98°C|98°C | |tmf:59.5 |____|_____ 72°C|72°C|tmr:59.7 |30s |10s \ 59.1°C _____|____|30s/kb | | \______/ 0:32|5min|GC 51% | | 30s | |1051bp """ formated = _textwrap.dedent( r""" |98°C|98°C | |tmf:{tmf:.1f} |____|_____ 72°C|72°C|tmr:{tmr:.1f} |30 s|10s \ {ta:.1f}°C _____|____|{rate}s/kb | | \______/{0:2}:{1:2}|2min|GC {GC_prod}% | | 20s | |{size}bp """[ 1:-1 ].format( rate=amplicon.annotations["proc_speed"], size=len(amplicon.seq), ta=amplicon.annotations["ta Q5 Hot Start"], tmf=amplicon.forward_primer.annotations["tm Q5 Hot Start"], tmr=amplicon.reverse_primer.annotations["tm Q5 Hot Start"], GC_prod=round(amplicon.gc() * 100, 2), *map(int, divmod(amplicon.annotations["elongation_time"], 60)), ) ) return _pretty_str(formated)
[docs]def set_plate_locations(amplicons:list): '''Makes a dataframe from amplicons Parameters ---------- amplicons : list list of pydna.amplicon objects Returns ------- pd.DataFrame with overview of plate locations''' plate_locations = [] for amplicon in amplicons: plate_locations.append([amplicon.name, amplicon.annotations['batches'][0]['location'], amplicon.annotations['template_name'], amplicon.template.annotations['batches'][0]['location'], amplicon.forward_primer.id, amplicon.forward_primer.annotations['batches'][0]['location'], amplicon.reverse_primer.id, amplicon.reverse_primer.annotations['batches'][0]['location'] ]) amplicon_df = pd.DataFrame(plate_locations, columns=['name', 'location','template_name', 'template_location','fw_name','fw_location','rv_name','rv_location']) amplicon_df = amplicon_df.set_index('name') return amplicon_df
[docs]def update_amplicon_annotations(amplicon_names:list,amplicons:list, locations:list, concentrations:list,volumes:list )->None: """Updates the annotations of amplicons in the amplicon list. Parameters ---------- amplicon_names : list List of amplicon names. locations : list List of locations for each amplicon. concentrations : list List of concentrations for each amplicon. volumes : list List of volumes for each amplicon. Returns ------- None """ for i in range(len(amplicon_names)): amplicon_by_name(amplicon_names[i], amplicons).annotations['batches'][0]['location'] = locations[i] amplicon_by_name(amplicon_names[i], amplicons).annotations['batches'][0]['concentration'] =concentrations[i] amplicon_by_name(amplicon_names[i], amplicons).annotations['batches'][0]['volume'] = volumes[i]
## Maybe redundant #def get_amplicons_by_row(row, amplicon_df, amplicons): # """Returns a list of amplicons in a given gel row. # # Parameters # ---------- # row : str # Name of the gel row. # amplicon_df : pandas DataFrame # DataFrame with amplicon information, including the column 'prow' indicating the gel row. # amplicons : list of Amplicon # List of Amplicon objects. # # Returns # ------- # list of Amplicon # List of Amplicon objects in the given gel row. # """ # row_names = amplicon_df[amplicon_df['prow']==row][['name']]['name'].tolist() # # row_amplicons = [] # for name in row_names: # for amplicon in amplicons: # if amplicon.name == name: # row_amplicons.append([amplicon]) # # return(row_amplicons) # # # #def get_amplicons_by_column(col, amplicon_df, amplicons): # """ # Returns a list of amplicons in a given gel column. # # Parameters # ---------- # col : str # Name of the gel column. # amplicon_df : pandas DataFrame # DataFrame with amplicon information, including the column 'pcol' indicating the gel column. # amplicons : list of Amplicon # List of Amplicon objects. # # Returns # ------- # list of Amplicon # List of Amplicon objects in the given gel column. # """ # col_names = amplicon_df[amplicon_df['pcol']==col][['name']]['name'].tolist() # # col_amplicons = [] # for name in col_names: # for amplicon in amplicons: # if amplicon.name == name: # col_amplicons.append([amplicon]) # # return(col_amplicons)