#!/usr/bin/env python
# MIT License
# Copyright (c) 2022, Technical University of Denmark (DTU)
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
""" This part of the design module is used for making combinatorial libraries from DNA fragments."""
# standard libraries
import itertools
import numpy as np
import pandas as pd
# Pydna for the molecular bio
from pydna.design import primer_design
from pydna.design import assembly_fragments
from pydna.assembly import Assembly
from pydna.tm import tm_default as _tm_default
[docs]def combinatorial_list_maker(
listOflist_that_is_being_made_into_all_combinations: list,
) -> list:
"""Makes all possible combinations from a list of list.
Parameters
----------
listOflist_that_is_being_made_into_all_combinations: list[list[any_type]]
can be of any type inside the list of lists
Returns
-------
combinations: list[tuple(any_type)]
all possible combinations of the list of lists
"""
combinations = list(
itertools.product(*listOflist_that_is_being_made_into_all_combinations)
)
return combinations
[docs]def systematic_names_function(List_of_list_parts: list) -> list:
"""Returns a list of list with systematic names i.e [1,1,1], [1,2,1]... etc
Parameters
----------
List_of_list_parts: list of list
can have anny type withing the list[list[any_type]]
Returns
-------
combinatorial_list_of_indexes
list of tuples with the systematic names eg. [(1,1,1),(1,2,1)]
"""
# The number of parts of each fragment
no_parts = [int(len(l)) for l in List_of_list_parts]
### For naming the strains systematically ### basicly making a list from the number of parts with indexes
list_of_systematic = []
midlertidiglist = []
for parts in no_parts:
for j in range(0, parts):
midlertidiglist.append(j + 1)
list_of_systematic.append(midlertidiglist)
midlertidiglist = []
# Then we use itertools to make the right combinations
combinatorial_list_of_indexes = list(itertools.product(*list_of_systematic))
return combinatorial_list_of_indexes
[docs]def empty_list_maker(list_of_sequences: list):
"""returns empty list in the length of seqs
Parameters
----------
list_of_sequences: list
could be any list with any types
Returns
-------
EmptyList:list
an empty list with the same dimensions
"""
EmptyList = [[] for i in range(len(list_of_sequences))]
return EmptyList
[docs]def simple_amplicon_maker(
list_of_seqs: list, list_of_names: list, target_tm=56.0, limit=13
):
"""Creates amplicons, updates their names
Parameters
----------
list_of_seqs : list[list[pydna.dseqrecord.Dseqrecord]]
List of the pydna.dseqrecord import Dseqrecord elements u want to made into amplicons
list_of_names : list[list[str]]
provide names for the sequences since pydna changes their names to amplicon
Returns
-------
list_of_amplicons : list[pydna.amplicon.Amplicon]
list with the pydna.amplicon.Amplicon objects that have been made
list_of_amplicon_primers : list[list[(pydna.seq.Seq, pydna.seq.Seq)]]
a list of all the generated primers in tuples where index0 = forward primer
and index1=reverse primer. Both are pydna.seq.Seq objects
list_of_amplicon_primer_temps : list[list[(float, float)]]
a list of melting temperatures in tuples where index0 = forward primer melting temp
and index1=reverse primer melting temp.
"""
# Start by making an empty list
list_of_amplicons = [[] for i in range(len(list_of_seqs))]
list_of_amplicon_primers = [[] for i in range(len(list_of_seqs))]
list_of_amplicon_primer_temps = [[] for i in range(len(list_of_seqs))]
### HERE WE CALCULATE Amplicons, primers, and their temperatures
# Then we calculate the primers with the NEB calculator
for i in range(0, len(list_of_seqs)):
for j in range(0, len(list_of_seqs[i])):
# Append Amplicons
amplicons = primer_design(
list_of_seqs[i][j],
tm_func=_tm_default,
target_tm=target_tm,
limit=limit,
) ############## Can add NEB Calculator here: primer_TM ################# _tm_default i.e tm_func = _tm_default,
# Updating names
amplicons.name = list_of_names[i][j]
list_of_amplicons[i].append(amplicons)
# Save the primers
primers = (amplicons.forward_primer.seq, amplicons.reverse_primer.seq)
list_of_amplicon_primers[i].append(primers)
# Save melting temps
############## Can add NEB Calculator here: primer_TM #############################
melting_temps = (
_tm_default(amplicons.forward_primer.seq),
_tm_default(amplicons.reverse_primer.seq),
)
list_of_amplicon_primer_temps[i].append(melting_temps)
return list_of_amplicons, list_of_amplicon_primers, list_of_amplicon_primer_temps
[docs]def get_primers(
List_of_assemblies: list,
combinatorial_list_of_names: list,
combinatorial_list_of_primer_tm: list,
):
"""Returns a list of ALL primers from the combinatorial library,
updates names and what they anneal to.
Parameters
----------
List_of_assemblies : list[list[pydna.amplicon.Amplicon]]
combinatorial_list_of_names : list[(str)]
combinatorial_list_of_primer_tm : list[(float, float),..)...]
Returns
-------
primers : list[list[[pydna.primer.Primer, pydna.primer.Primer]]
All primers that have been made for all assemblies
"""
primers_temporary = []
primers = []
counter = 0
for i in range(0, len(List_of_assemblies)):
for j in range(0, len(List_of_assemblies[i])):
counter += 1
# Names
List_of_assemblies[i][j].name = combinatorial_list_of_names[i][j]
# Primers
# description ------ DESCRIBES what other part it overlaps-------------
if j == 0: # START OF THE ASSEMBLY
List_of_assemblies[i][
j
].forward_primer.description = "Anneals to " + str(
List_of_assemblies[i][j].name
)
List_of_assemblies[i][j].reverse_primer.description = (
"Anneals to "
+ str(List_of_assemblies[i][j].name)
+ ", overlaps to "
+ str(List_of_assemblies[i][j + 1].name)
)
if j > 0 and j < len(List_of_assemblies[i]) - 1: # # THE rest:
List_of_assemblies[i][
j
].forward_primer.description = "Anneals to " + str(
List_of_assemblies[i][j].name
+ ", overlaps to "
+ str(List_of_assemblies[i][j - 1].name)
)
List_of_assemblies[i][
j
].reverse_primer.description = "Anneals to " + str(
List_of_assemblies[i][j].name
+ ", overlaps to "
+ str(List_of_assemblies[i][j + 1].name)
)
if j == len(List_of_assemblies[i]) - 1: # THE END OF THE ASSEMBLY
List_of_assemblies[i][j].forward_primer.description = (
"Anneals to "
+ str(List_of_assemblies[i][j].name)
+ ", overlaps to "
+ str(List_of_assemblies[i][j - 1].name)
)
List_of_assemblies[i][
j
].reverse_primer.description = "Anneals to " + str(
List_of_assemblies[i][j].name
)
# template it aneals to
List_of_assemblies[i][j].forward_primer.name = str(
List_of_assemblies[i][j].name
)
List_of_assemblies[i][j].reverse_primer.name = str(
List_of_assemblies[i][j].name
)
# Primer tm
List_of_assemblies[i][j].forward_primer.features = round(
float(combinatorial_list_of_primer_tm[i][j][0]), 2
)
List_of_assemblies[i][j].reverse_primer.features = round(
float(combinatorial_list_of_primer_tm[i][j][1]), 2
)
fwd_rev_primers = [
List_of_assemblies[i][j].forward_primer,
List_of_assemblies[i][j].reverse_primer,
]
primers_temporary.append(fwd_rev_primers)
primers.append(primers_temporary)
primers_temporary = []
return primers
[docs]def assembly_maker(combinatorial_list_of_amplicons: list, overlap=35):
"""Assembles Amplicons with pad and makes new overlapping primers.
Parameters
----------
combinatorial_list_of_amplicons : list[[pydna.amplicon.Amplicon]]
the list of pydna.amplicon.Amplicon that you want generate
overlapping primers for.
overlap : int = 35
How many basepair overlaps
Returns
-------
List_of_assemblies : list[[pydna.amplicon.Amplicon]]
amplicons that overlaps eachother with the specified overlap value.
"""
List_of_assemblies = []
for i in range(0, len(combinatorial_list_of_amplicons)):
List_of_assemblies.append(
assembly_fragments(combinatorial_list_of_amplicons[i], overlap, maxlink=40)
)
return List_of_assemblies
[docs]def unique_primers(primers: list, list_of_assemblies):
"""Finds unique primers from a list of assemblies
Parameters
----------
primers : list[list[[pydna.primer.Primer, pydna.primer.Primer]]
a list of all the primers made for the combinatorial library
list_of_assemblies: list[[pydna.amplicon.Amplicon]]
used here to update the names of the primers
Returns
-------
unique_primers : list[list(ID,Anneals_to,Sequence,Annealing_temp,Length,Price(DKK))]
Relevant metrics for the unique primers of the combinatorial library.
"""
unikke_F_primers = []
unikke_R_primers = []
length_of_unique_primers = 0
counter = 0
primer_list = []
for i in range(0, len(primers)):
for j in range(0, len(primers[i])):
counter += len(primers[i][j])
if primers[i][j][0] not in unikke_F_primers:
unikke_F_primers.append(primers[i][j][0])
if primers[i][j][1] not in unikke_R_primers:
unikke_R_primers.append(primers[i][j][1])
counter = 0
unique_forward_primers = []
unique_reverse_primers = []
### CHANGING THE NAMES OF THE PRIMERS
# Forward primers
for i in range(len(unikke_F_primers)):
counter += 1
unikke_F_primers[i].id = "F{number:03}".format(number=counter)
length_of_unique_primers += len(unikke_F_primers[i].seq)
U_f_primers = [
unikke_F_primers[i].id,
unikke_F_primers[i].name,
unikke_F_primers[i].seq,
unikke_F_primers[i].features, # anealing temp
len(unikke_F_primers[i].seq), # lenght
len(unikke_F_primers[i].seq) * 1.8, # price
]
unique_forward_primers.append(U_f_primers)
# Reverse primers
for i in range(len(unikke_R_primers)):
counter += 1
unikke_R_primers[i].id = "R{number:03}".format(number=counter)
length_of_unique_primers += len(unikke_R_primers[i].seq)
U_r_primers = [
unikke_R_primers[i].id,
unikke_R_primers[i].name,
unikke_R_primers[i].seq,
unikke_R_primers[i].features,
len(unikke_R_primers[i].seq),
len(unikke_R_primers[i].seq) * 1.8, # cost
]
unique_reverse_primers.append(U_r_primers)
primer_list = (
unique_forward_primers + unique_reverse_primers
) # COULD CONCATONATE THEM INTO: unique_forward_primers + unique_reverse_primers
### Updating primer names and removing duplicates
for i in range(0, len(list_of_assemblies)):
for j in range(0, len(list_of_assemblies[i])):
for l in range(0, len(unikke_F_primers)):
if (
list_of_assemblies[i][j].forward_primer.seq
== unikke_F_primers[l].seq
):
list_of_assemblies[i][j].forward_primer = unikke_F_primers[l]
for m in range(0, len(unique_reverse_primers)):
if (
list_of_assemblies[i][j].reverse_primer.seq
== unikke_R_primers[m].seq
):
list_of_assemblies[i][j].reverse_primer = unikke_R_primers[m]
return primer_list
[docs]def unique_amplicons(list_of_assemblies: list):
"""Finds Unique amplicons from a list of assemblies
Parameters
----------
list_of_assemblies: list[[pydna.amplicon.Amplicon]]
list of the combinatorial libarary with overlapping ends
Returns
-------
unique_amplicons: list[pydna.amplicon.Amplicon]
returns a list of unique amplicons where relavant metrics
are added to the objects.
"""
### Unique amplicons
unique_amplicons = []
for i in range(0, len(list_of_assemblies)):
for j in range(0, len(list_of_assemblies[i])):
if list_of_assemblies[i][j] not in unique_amplicons:
unique_amplicons.append(list_of_assemblies[i][j])
return unique_amplicons
[docs]def making_assembly_objects(list_of_assemblies: list):
"""Assembling amplicons into assembling class that shows
fragments, limit,nodes and which algorithm that was used
for assembling.
Parameters
----------
list_of_assemblies: list[[pydna.amplicon.Amplicon]]
list of the combinatorial libarary with overlapping ends
Returns
-------
list_of_assembly_objects: list[pydna.assembly.Assembly]
shows which algorithm that was used, nodes, limit and fragments
"""
list_of_assembly_objects = []
for i in range(0, len(list_of_assemblies)):
list_of_assembly_objects.append(Assembly((list_of_assemblies[i]), limit=35))
return list_of_assembly_objects
[docs]def making_assembled_contigs(list_of_assembly_objects: list):
"""Assembles a list of assembly object into
linear contigs.
Parameters
----------
list_of_assembly_objects : list[pydna.assembly.Assembly]
these objects can be assembled into contigs
Returns
-------
list_of_assembly_objects : list[]
list_of_assembly_objects have been assembled into contigs
"""
contigs_assembled = []
for j in range(0, len(list_of_assembly_objects)):
contigs_assembled.append(list_of_assembly_objects[j].assemble_linear())
return list_of_assembly_objects
[docs]class DesignAssembly:
"""Class able to make a combinatorial library from DNA fragments.
Parameters
----------
list_of_seqs : list
A list of list of a constructs of choice.
list_of_names : list
A list of list of the names wanted for the construct of choice.
pad : pydna.Dseqrecord
A nucleotide sequence to be incorporated into the primers (Max is 40 bp)
position_of_pad : int
the position in the list of seqs where the pad is incorporated (zero indexed)
Returns
-------
constrain.design.combinatorial_design.DesignAssembly object
A powerfull class and a lot of information can be retrieved.
Such as: showing all the amplicons needed to construct a combinatorial library
with the simple method --> PCR_list_to_dataframe or Primer_list_to_dataframe.
"""
def __init__(
self,
list_of_seqs: list,
list_of_names: list,
pad: str,
position_of_pad: int,
target_tm=56.0,
limit=13,
overlap=35,
):
### 1.INITIALIZING ##
self.list_of_seqs = list_of_seqs
self.list_of_names = list_of_names
self.pad = pad
self.position_of_pad = position_of_pad
### 2. Amplicons, primers, and their temperatures
(
self.list_of_amplicons,
self.list_of_amplicon_primers,
self.list_of_amplicon_primer_temps,
) = simple_amplicon_maker(
self.list_of_seqs, self.list_of_names, target_tm=target_tm, limit=limit
)
# Systematic names
self.systematic_names = systematic_names_function(self.list_of_seqs)
### 3. COMBINATORIAL LISTS
self.combinatorial_list_of_amplicons = combinatorial_list_maker(
self.list_of_amplicons
)
self.combinatorial_list_of_names = combinatorial_list_maker(self.list_of_names)
self.combinatorial_list_of_primer_tm = combinatorial_list_maker(
self.list_of_amplicon_primer_temps
)
# Making the combinations into a list so we can insert PADS later (They are tuples at this stage, and insert doesnt work for tuples)
for i in range(0, len(self.combinatorial_list_of_amplicons)):
self.combinatorial_list_of_amplicons[i] = list(
self.combinatorial_list_of_amplicons[i]
)
#### 4. Adding PAD ###
for i in range(0, len(self.combinatorial_list_of_amplicons)):
self.combinatorial_list_of_amplicons[i].insert(
self.position_of_pad, self.pad
)
### 5. Assembling and making overlapping primers
self.list_of_assemblies = assembly_maker(
self.combinatorial_list_of_amplicons, overlap=overlap
)
### 6. GETTING all primers, annotating, adding features
self.primers = get_primers(
self.list_of_assemblies,
self.combinatorial_list_of_names,
self.combinatorial_list_of_primer_tm,
)
### 7. Getting Unique primers and re-annotating list_assemblies to get right names
self.unique_primers = unique_primers(self.primers, self.list_of_assemblies)
### 8. Unique amplicons
self.unique_amplicons = unique_amplicons(self.list_of_assemblies)
[docs] def ShowContigs(self):
"""Returns a string of the contigs generated by the assembly"""
print("Template, Primer, tm")
for i in range(0, len(self.list_of_assemblies)):
print("\nContig" + str(self.systematic_names[i]))
for j in range(0, len(self.list_of_assemblies[i])):
print(
"Template: ", self.list_of_assemblies[i][j].name[0:15]
) # , '\t', self.primers[i][j][0].name,'\t',self.primers[i][j][0].features)
return
[docs] def ShowVariantsLibDF(self):
"""Returns a dataframe of all the variants"""
combinatorial_lib_variants_df = pd.DataFrame(self.combinatorial_list_of_names)
systematic_names = self.systematic_names
combinatorial_lib_variants_df["Systematic_name"] = systematic_names
combinatorial_lib_variants_df["Variant"] = np.arange(
len(combinatorial_lib_variants_df)
)
return combinatorial_lib_variants_df
[docs] def print_primer_list(self):
"""Return the list of transfers in human-readable format."""
for primers in self.unique_primers:
print(primers)
[docs] def primer_list(self):
"""Return the list of transfers in human-readable format."""
primer_list = []
for primers in self.unique_primers:
primer_list.append(primers)
return primer_list
[docs] def primer_list_to_dataframe(self):
"""Return a pandas dataframe with list of primers."""
df = pd.DataFrame(self.unique_primers)
df.columns = [
"ID",
"Anneals to",
"Sequence",
"Annealing temperature",
"Length",
"Price(DKK)",
]
return df
[docs] def print_PCR_list(self):
"""Prints PCR_list"""
print("PCR#, Template,forward_primer, reverse primer, F_tm, R_tm")
for i in range(0, len(self.unique_amplicons)):
print(
"PCR{number}".format(number=i + 1),
",",
self.unique_amplicons[i].name,
",",
self.unique_amplicons[i].forward_primer.id,
",",
self.unique_amplicons[i].reverse_primer.id,
",",
self.unique_amplicons[i].forward_primer.features,
",",
self.unique_amplicons[i].reverse_primer.features,
)
[docs] def PCR_list(self):
"""Returns a PCR_list"""
pcr_list = []
for i in range(0, len(self.unique_amplicons)):
PCR = [
"PCR{number}".format(number=i + 1),
self.unique_amplicons[i].name,
self.unique_amplicons[i].forward_primer.id,
self.unique_amplicons[i].reverse_primer.id,
self.unique_amplicons[i].forward_primer.features,
self.unique_amplicons[i].reverse_primer.features,
]
pcr_list.append(PCR)
return pcr_list
[docs] def PCR_list_to_dataframe(self):
"""Prints PCR_list into a pandas dataframe"""
dataframe_list = []
for i in range(0, len(self.unique_amplicons)):
lst = [
"PCR{number}".format(number=i + 1),
self.unique_amplicons[i].name,
self.unique_amplicons[i].forward_primer.id,
self.unique_amplicons[i].reverse_primer.id,
self.unique_amplicons[i].forward_primer.features,
self.unique_amplicons[i].reverse_primer.features,
]
dataframe_list.append(lst)
df = pd.DataFrame(dataframe_list)
df.columns = [
"PCR#",
"Template",
"forward_primer",
"reverse_primer",
"F_tm",
"R_tm",
]
return df
[docs] def graphical_representation_of_assemblies(self):
"""
Takes in the assembly object and returns graphical report of the
fragments assembled
"""
graphical_representation = [
self.assembly_object[x].assemble_linear()[0].figure()
for x in range(0, len(self.assembly_object))
]
return graphical_representation
[docs]def count_unique_parts(df, max_combinations:int):
"""Iterate through the list of predictions and save new encountered parts.
Parameters
----------
df : pd.DataFrame
Dataframe containing predictions
Returns:
--------
parts_encounteres : dict
A dictionary containing the unique parts encountered in 'G8H','pG8H', 'pCPR', 'CPR' columns,
total number of unique combinations encountered in 'Sum of parts' and total predictions
encountered in 'Predictions'
"""
#Iterate through the list of predictions and save new encountered parts. Stop after 180 combiantions.
#Initialisation
parts_encounteres = {'G8H':[],'pG8H':[], 'pCPR': [], 'CPR': [], 'Sum of parts':'', 'Predictions': ''}
sum_of_parts = 0
i = 0
g8h_count = 0
cpr_count = 0
pg8h_count = 0
pcpr_count = 0
#Loop through the predctions and save new parts.
while sum_of_parts < max_combinations:
sum_of_parts = g8h_count*cpr_count*pg8h_count*pcpr_count
parts_encounteres['Sum of parts'] = str(sum_of_parts)
parts_encounteres['Predictions'] = str(i)
g8h = df.G8H[i]
pg8h = df.pG8H[i]
cpr = df.CPR[i]
pcpr = df.pCPR[i]
if g8h not in parts_encounteres['G8H']:
parts_encounteres['G8H'].append(g8h)
g8h_count += 1
if pg8h not in parts_encounteres['pG8H']:
parts_encounteres['pG8H'].append(pg8h)
pg8h_count += 1
if cpr not in parts_encounteres['CPR']:
parts_encounteres['CPR'].append(cpr)
cpr_count += 1
if pcpr not in parts_encounteres['pCPR']:
parts_encounteres['pCPR'].append(pcpr)
pcpr_count += 1
i += 1
return parts_encounteres