Source code for constrain.design.combinatorial_design

#!/usr/bin/env python
# MIT License
# Copyright (c) 2022, Technical University of Denmark (DTU)
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

""" This part of the design module is used for making combinatorial libraries from DNA fragments."""

# standard libraries
import itertools
import numpy as np
import pandas as pd

# Pydna for the molecular bio
from pydna.design import primer_design
from pydna.design import assembly_fragments
from pydna.assembly import Assembly
from pydna.tm import tm_default as _tm_default

[docs]def combinatorial_list_maker(
    listOflist_that_is_being_made_into_all_combinations: list,
) -> list:
    """Makes all possible combinations from a list of list.

    Parameters
    ----------
    listOflist_that_is_being_made_into_all_combinations: list[list[any_type]]
        can be of any type inside the list of lists

    Returns
    -------
    combinations: list[tuple(any_type)]
        all possible combinations of the list of lists

    """
    combinations = list(
        itertools.product(*listOflist_that_is_being_made_into_all_combinations)
    )

    return combinations


[docs]def systematic_names_function(List_of_list_parts: list) -> list:
    """Returns a list of list with systematic names i.e [1,1,1], [1,2,1]... etc

    Parameters
    ----------
    List_of_list_parts: list of list
        can have anny type withing the list[list[any_type]]

    Returns
    -------
    combinatorial_list_of_indexes
        list of tuples with the systematic names eg. [(1,1,1),(1,2,1)]

    """
    # The number of parts of each fragment
    no_parts = [int(len(l)) for l in List_of_list_parts]

    ### For naming the strains systematically ### basicly making a list from the number of parts with indexes
    list_of_systematic = []
    midlertidiglist = []
    for parts in no_parts:
        for j in range(0, parts):
            midlertidiglist.append(j + 1)
        list_of_systematic.append(midlertidiglist)
        midlertidiglist = []

    # Then we use itertools to make the right combinations
    combinatorial_list_of_indexes = list(itertools.product(*list_of_systematic))

    return combinatorial_list_of_indexes


[docs]def empty_list_maker(list_of_sequences: list):
    """returns empty list in the length of seqs

    Parameters
    ----------
    list_of_sequences: list
        could be any list with any types

    Returns
    -------
    EmptyList:list
        an empty list with the same dimensions

    """
    EmptyList = [[] for i in range(len(list_of_sequences))]

    return EmptyList


[docs]def simple_amplicon_maker(
    list_of_seqs: list, list_of_names: list, target_tm=56.0, limit=13
):
    """Creates amplicons, updates their names

    Parameters
    ----------
    list_of_seqs : list[list[pydna.dseqrecord.Dseqrecord]]
        List of the pydna.dseqrecord import Dseqrecord elements u want to made into amplicons

    list_of_names : list[list[str]]
        provide names for the sequences since pydna changes their names to amplicon

    Returns
    -------
    list_of_amplicons : list[pydna.amplicon.Amplicon]
        list with the pydna.amplicon.Amplicon objects that have been made

    list_of_amplicon_primers : list[list[(pydna.seq.Seq, pydna.seq.Seq)]]
        a list of all the generated primers in tuples where index0 = forward primer
        and index1=reverse primer. Both are pydna.seq.Seq objects

    list_of_amplicon_primer_temps : list[list[(float, float)]]
        a list of melting temperatures in tuples where index0 = forward primer melting temp
        and index1=reverse primer melting temp.

    """
    # Start by making an empty list
    list_of_amplicons = [[] for i in range(len(list_of_seqs))]
    list_of_amplicon_primers = [[] for i in range(len(list_of_seqs))]
    list_of_amplicon_primer_temps = [[] for i in range(len(list_of_seqs))]

    ### HERE WE CALCULATE Amplicons, primers, and their temperatures
    # Then we calculate the primers with the NEB calculator
    for i in range(0, len(list_of_seqs)):
        for j in range(0, len(list_of_seqs[i])):
            # Append Amplicons
            amplicons = primer_design(
                list_of_seqs[i][j],
                tm_func=_tm_default,
                target_tm=target_tm,
                limit=limit,
            )  ############## Can add NEB Calculator here: primer_TM ################# _tm_default i.e tm_func = _tm_default,

            # Updating names
            amplicons.name = list_of_names[i][j]
            list_of_amplicons[i].append(amplicons)

            # Save the primers
            primers = (amplicons.forward_primer.seq, amplicons.reverse_primer.seq)
            list_of_amplicon_primers[i].append(primers)

            # Save melting temps
            ############## Can add NEB Calculator here: primer_TM #############################
            melting_temps = (
                _tm_default(amplicons.forward_primer.seq),
                _tm_default(amplicons.reverse_primer.seq),
            )
            list_of_amplicon_primer_temps[i].append(melting_temps)

    return list_of_amplicons, list_of_amplicon_primers, list_of_amplicon_primer_temps


[docs]def get_primers(
    List_of_assemblies: list,
    combinatorial_list_of_names: list,
    combinatorial_list_of_primer_tm: list,
):
    """Returns a list of ALL primers from the combinatorial library,
    updates names and what they anneal to.

    Parameters
    ----------
    List_of_assemblies : list[list[pydna.amplicon.Amplicon]]
    combinatorial_list_of_names : list[(str)]
    combinatorial_list_of_primer_tm : list[(float, float),..)...]

    Returns
    -------
    primers : list[list[[pydna.primer.Primer, pydna.primer.Primer]]
        All primers that have been made for all assemblies
    """

    primers_temporary = []
    primers = []

    counter = 0
    for i in range(0, len(List_of_assemblies)):
        for j in range(0, len(List_of_assemblies[i])):
            counter += 1
            # Names
            List_of_assemblies[i][j].name = combinatorial_list_of_names[i][j]
            # Primers
            # description ------ DESCRIBES what other part it overlaps-------------
            if j == 0:  # START OF THE ASSEMBLY
                List_of_assemblies[i][
                    j
                ].forward_primer.description = "Anneals to " + str(
                    List_of_assemblies[i][j].name
                )
                List_of_assemblies[i][j].reverse_primer.description = (
                    "Anneals to "
                    + str(List_of_assemblies[i][j].name)
                    + ", overlaps to "
                    + str(List_of_assemblies[i][j + 1].name)
                )
            if j > 0 and j < len(List_of_assemblies[i]) - 1:  #      # THE rest:
                List_of_assemblies[i][
                    j
                ].forward_primer.description = "Anneals to " + str(
                    List_of_assemblies[i][j].name
                    + ", overlaps to "
                    + str(List_of_assemblies[i][j - 1].name)
                )
                List_of_assemblies[i][
                    j
                ].reverse_primer.description = "Anneals to " + str(
                    List_of_assemblies[i][j].name
                    + ", overlaps to "
                    + str(List_of_assemblies[i][j + 1].name)
                )
            if j == len(List_of_assemblies[i]) - 1:  # THE END OF THE ASSEMBLY
                List_of_assemblies[i][j].forward_primer.description = (
                    "Anneals to "
                    + str(List_of_assemblies[i][j].name)
                    + ", overlaps to "
                    + str(List_of_assemblies[i][j - 1].name)
                )
                List_of_assemblies[i][
                    j
                ].reverse_primer.description = "Anneals to " + str(
                    List_of_assemblies[i][j].name
                )

            # template it aneals to
            List_of_assemblies[i][j].forward_primer.name = str(
                List_of_assemblies[i][j].name
            )
            List_of_assemblies[i][j].reverse_primer.name = str(
                List_of_assemblies[i][j].name
            )

            # Primer tm
            List_of_assemblies[i][j].forward_primer.features = round(
                float(combinatorial_list_of_primer_tm[i][j][0]), 2
            )
            List_of_assemblies[i][j].reverse_primer.features = round(
                float(combinatorial_list_of_primer_tm[i][j][1]), 2
            )

            fwd_rev_primers = [
                List_of_assemblies[i][j].forward_primer,
                List_of_assemblies[i][j].reverse_primer,
            ]
            primers_temporary.append(fwd_rev_primers)

        primers.append(primers_temporary)
        primers_temporary = []

    return primers


[docs]def assembly_maker(combinatorial_list_of_amplicons: list, overlap=35):
    """Assembles Amplicons with pad and makes new overlapping primers.
    Parameters
    ----------
    combinatorial_list_of_amplicons : list[[pydna.amplicon.Amplicon]]
        the list of pydna.amplicon.Amplicon that you want generate
        overlapping primers for.

    overlap : int = 35
        How many basepair overlaps

    Returns
    -------
    List_of_assemblies : list[[pydna.amplicon.Amplicon]]
        amplicons that overlaps eachother with the specified overlap value.


    """

    List_of_assemblies = []
    for i in range(0, len(combinatorial_list_of_amplicons)):
        List_of_assemblies.append(
            assembly_fragments(combinatorial_list_of_amplicons[i], overlap, maxlink=40)
        )

    return List_of_assemblies


[docs]def unique_primers(primers: list, list_of_assemblies):
    """Finds unique primers from a list of assemblies
    Parameters
    ----------
    primers : list[list[[pydna.primer.Primer, pydna.primer.Primer]]
        a list of all the primers made for the combinatorial library

    list_of_assemblies: list[[pydna.amplicon.Amplicon]]
        used here to update the names of the primers

    Returns
    -------
    unique_primers : list[list(ID,Anneals_to,Sequence,Annealing_temp,Length,Price(DKK))]
        Relevant metrics for the unique primers of the combinatorial library.

    """

    unikke_F_primers = []
    unikke_R_primers = []
    length_of_unique_primers = 0
    counter = 0
    primer_list = []

    for i in range(0, len(primers)):
        for j in range(0, len(primers[i])):
            counter += len(primers[i][j])
            if primers[i][j][0] not in unikke_F_primers:
                unikke_F_primers.append(primers[i][j][0])
            if primers[i][j][1] not in unikke_R_primers:
                unikke_R_primers.append(primers[i][j][1])

    counter = 0
    unique_forward_primers = []
    unique_reverse_primers = []

    ### CHANGING THE NAMES OF THE PRIMERS
    # Forward primers
    for i in range(len(unikke_F_primers)):
        counter += 1
        unikke_F_primers[i].id = "F{number:03}".format(number=counter)
        length_of_unique_primers += len(unikke_F_primers[i].seq)
        U_f_primers = [
            unikke_F_primers[i].id,
            unikke_F_primers[i].name,
            unikke_F_primers[i].seq,
            unikke_F_primers[i].features,  # anealing temp
            len(unikke_F_primers[i].seq),  # lenght
            len(unikke_F_primers[i].seq) * 1.8,  # price
        ]
        unique_forward_primers.append(U_f_primers)
    # Reverse primers
    for i in range(len(unikke_R_primers)):
        counter += 1
        unikke_R_primers[i].id = "R{number:03}".format(number=counter)
        length_of_unique_primers += len(unikke_R_primers[i].seq)
        U_r_primers = [
            unikke_R_primers[i].id,
            unikke_R_primers[i].name,
            unikke_R_primers[i].seq,
            unikke_R_primers[i].features,
            len(unikke_R_primers[i].seq),
            len(unikke_R_primers[i].seq) * 1.8,  # cost
        ]
        unique_reverse_primers.append(U_r_primers)

    primer_list = (
        unique_forward_primers + unique_reverse_primers
    )  # COULD CONCATONATE THEM INTO: unique_forward_primers + unique_reverse_primers

    ### Updating primer names and removing duplicates
    for i in range(0, len(list_of_assemblies)):
        for j in range(0, len(list_of_assemblies[i])):
            for l in range(0, len(unikke_F_primers)):
                if (
                    list_of_assemblies[i][j].forward_primer.seq
                    == unikke_F_primers[l].seq
                ):
                    list_of_assemblies[i][j].forward_primer = unikke_F_primers[l]
            for m in range(0, len(unique_reverse_primers)):
                if (
                    list_of_assemblies[i][j].reverse_primer.seq
                    == unikke_R_primers[m].seq
                ):
                    list_of_assemblies[i][j].reverse_primer = unikke_R_primers[m]

    return primer_list


[docs]def unique_amplicons(list_of_assemblies: list):

    """Finds Unique amplicons from a list of assemblies
    Parameters
    ----------
    list_of_assemblies: list[[pydna.amplicon.Amplicon]]
        list of the combinatorial libarary with overlapping ends

    Returns
    -------
        unique_amplicons: list[pydna.amplicon.Amplicon]
            returns a list of unique amplicons where relavant metrics
            are added to the objects.
    """
    ### Unique amplicons
    unique_amplicons = []
    for i in range(0, len(list_of_assemblies)):
        for j in range(0, len(list_of_assemblies[i])):
            if list_of_assemblies[i][j] not in unique_amplicons:
                unique_amplicons.append(list_of_assemblies[i][j])

    return unique_amplicons


[docs]def making_assembly_objects(list_of_assemblies: list):
    """Assembling amplicons into assembling class that shows
    fragments, limit,nodes and which algorithm that was used
    for assembling.

    Parameters
    ----------
    list_of_assemblies: list[[pydna.amplicon.Amplicon]]
        list of the combinatorial libarary with overlapping ends

    Returns
    -------
        list_of_assembly_objects: list[pydna.assembly.Assembly]
            shows which algorithm that was used, nodes, limit and fragments

    """
    list_of_assembly_objects = []
    for i in range(0, len(list_of_assemblies)):
        list_of_assembly_objects.append(Assembly((list_of_assemblies[i]), limit=35))

    return list_of_assembly_objects


[docs]def making_assembled_contigs(list_of_assembly_objects: list):
    """Assembles a list of assembly object into
    linear contigs.

    Parameters
    ----------
    list_of_assembly_objects : list[pydna.assembly.Assembly]
        these objects can be assembled into contigs

    Returns
    -------
    list_of_assembly_objects : list[]
        list_of_assembly_objects have been assembled into contigs
    """
    contigs_assembled = []
    for j in range(0, len(list_of_assembly_objects)):
        contigs_assembled.append(list_of_assembly_objects[j].assemble_linear())

    return list_of_assembly_objects


[docs]class DesignAssembly:
    """Class able to make a combinatorial library from DNA fragments.

    Parameters
    ----------
    list_of_seqs : list
        A list of list of a constructs of choice.
    list_of_names : list
        A list of list of the names wanted for the construct of choice.
    pad : pydna.Dseqrecord
        A nucleotide sequence to be incorporated into the primers (Max is 40 bp)
    position_of_pad : int
        the position in the list of seqs where the pad is incorporated (zero indexed)

    Returns
    -------
    constrain.design.combinatorial_design.DesignAssembly object
        A powerfull class and a lot of information can be retrieved.
        Such as: showing all the amplicons needed to construct a combinatorial library
        with the simple method --> PCR_list_to_dataframe or Primer_list_to_dataframe.

    """

    def __init__(
        self,
        list_of_seqs: list,
        list_of_names: list,
        pad: str,
        position_of_pad: int,
        target_tm=56.0,
        limit=13,
        overlap=35,
    ):

        ###  1.INITIALIZING ##
        self.list_of_seqs = list_of_seqs
        self.list_of_names = list_of_names
        self.pad = pad
        self.position_of_pad = position_of_pad

        ### 2. Amplicons, primers, and their temperatures
        (
            self.list_of_amplicons,
            self.list_of_amplicon_primers,
            self.list_of_amplicon_primer_temps,
        ) = simple_amplicon_maker(
            self.list_of_seqs, self.list_of_names, target_tm=target_tm, limit=limit
        )

        # Systematic names
        self.systematic_names = systematic_names_function(self.list_of_seqs)

        ### 3. COMBINATORIAL LISTS
        self.combinatorial_list_of_amplicons = combinatorial_list_maker(
            self.list_of_amplicons
        )
        self.combinatorial_list_of_names = combinatorial_list_maker(self.list_of_names)
        self.combinatorial_list_of_primer_tm = combinatorial_list_maker(
            self.list_of_amplicon_primer_temps
        )

        # Making the combinations into a list so we can insert PADS later (They are tuples at this stage, and insert doesnt work for tuples)
        for i in range(0, len(self.combinatorial_list_of_amplicons)):
            self.combinatorial_list_of_amplicons[i] = list(
                self.combinatorial_list_of_amplicons[i]
            )

        #### 4. Adding PAD ###
        for i in range(0, len(self.combinatorial_list_of_amplicons)):
            self.combinatorial_list_of_amplicons[i].insert(
                self.position_of_pad, self.pad
            )

        ### 5. Assembling and making overlapping primers
        self.list_of_assemblies = assembly_maker(
            self.combinatorial_list_of_amplicons, overlap=overlap
        )

        ### 6. GETTING all primers, annotating, adding features
        self.primers = get_primers(
            self.list_of_assemblies,
            self.combinatorial_list_of_names,
            self.combinatorial_list_of_primer_tm,
        )

        ### 7. Getting Unique primers and re-annotating list_assemblies to get right names
        self.unique_primers = unique_primers(self.primers, self.list_of_assemblies)

        ### 8. Unique amplicons
        self.unique_amplicons = unique_amplicons(self.list_of_assemblies)

[docs]    def ShowContigs(self):
        """Returns a string of the contigs generated by the assembly"""
        print("Template, Primer, tm")
        for i in range(0, len(self.list_of_assemblies)):
            print("\nContig" + str(self.systematic_names[i]))
            for j in range(0, len(self.list_of_assemblies[i])):
                print(
                    "Template: ", self.list_of_assemblies[i][j].name[0:15]
                )  # , '\t', self.primers[i][j][0].name,'\t',self.primers[i][j][0].features)
        return

[docs]    def ShowVariantsLibDF(self):
        """Returns a dataframe of all the variants"""
        combinatorial_lib_variants_df = pd.DataFrame(self.combinatorial_list_of_names)
        systematic_names = self.systematic_names
        combinatorial_lib_variants_df["Systematic_name"] = systematic_names
        combinatorial_lib_variants_df["Variant"] = np.arange(
            len(combinatorial_lib_variants_df)
        )

        return combinatorial_lib_variants_df

[docs]    def print_primer_list(self):
        """Return the list of transfers in human-readable format."""
        for primers in self.unique_primers:
            print(primers)

[docs]    def primer_list(self):
        """Return the list of transfers in human-readable format."""
        primer_list = []
        for primers in self.unique_primers:
            primer_list.append(primers)

        return primer_list

[docs]    def primer_list_to_dataframe(self):
        """Return a pandas dataframe with list of primers."""
        df = pd.DataFrame(self.unique_primers)
        df.columns = [
            "ID",
            "Anneals to",
            "Sequence",
            "Annealing temperature",
            "Length",
            "Price(DKK)",
        ]

        return df

[docs]    def print_PCR_list(self):
        """Prints PCR_list"""
        print("PCR#, Template,forward_primer, reverse primer, F_tm, R_tm")
        for i in range(0, len(self.unique_amplicons)):
            print(
                "PCR{number}".format(number=i + 1),
                ",",
                self.unique_amplicons[i].name,
                ",",
                self.unique_amplicons[i].forward_primer.id,
                ",",
                self.unique_amplicons[i].reverse_primer.id,
                ",",
                self.unique_amplicons[i].forward_primer.features,
                ",",
                self.unique_amplicons[i].reverse_primer.features,
            )

[docs]    def PCR_list(self):
        """Returns a PCR_list"""
        pcr_list = []
        for i in range(0, len(self.unique_amplicons)):
            PCR = [
                "PCR{number}".format(number=i + 1),
                self.unique_amplicons[i].name,
                self.unique_amplicons[i].forward_primer.id,
                self.unique_amplicons[i].reverse_primer.id,
                self.unique_amplicons[i].forward_primer.features,
                self.unique_amplicons[i].reverse_primer.features,
            ]
            pcr_list.append(PCR)

        return pcr_list

[docs]    def PCR_list_to_dataframe(self):
        """Prints PCR_list into a pandas dataframe"""
        dataframe_list = []
        for i in range(0, len(self.unique_amplicons)):
            lst = [
                "PCR{number}".format(number=i + 1),
                self.unique_amplicons[i].name,
                self.unique_amplicons[i].forward_primer.id,
                self.unique_amplicons[i].reverse_primer.id,
                self.unique_amplicons[i].forward_primer.features,
                self.unique_amplicons[i].reverse_primer.features,
            ]
            dataframe_list.append(lst)

        df = pd.DataFrame(dataframe_list)
        df.columns = [
            "PCR#",
            "Template",
            "forward_primer",
            "reverse_primer",
            "F_tm",
            "R_tm",
        ]

        return df

[docs]    def graphical_representation_of_assemblies(self):
        """
        Takes in the assembly object and returns graphical report of the
        fragments assembled
        """
        graphical_representation = [
            self.assembly_object[x].assemble_linear()[0].figure()
            for x in range(0, len(self.assembly_object))
        ]

        return graphical_representation


[docs]def count_unique_parts(df, max_combinations:int): 
    """Iterate through the list of predictions and save new encountered parts. 
    
    Parameters
    ----------
    df : pd.DataFrame
        Dataframe containing predictions
    
    Returns:
    --------
    parts_encounteres : dict 
        A dictionary containing the unique parts encountered in 'G8H','pG8H', 'pCPR', 'CPR' columns,
        total number of unique combinations encountered in 'Sum of parts' and total predictions
        encountered in 'Predictions'

    """
    #Iterate through the list of predictions and save new encountered parts. Stop after 180 combiantions.
    #Initialisation
    parts_encounteres = {'G8H':[],'pG8H':[], 'pCPR': [], 'CPR': [], 'Sum of parts':'', 'Predictions': ''}
    sum_of_parts = 0
    i = 0
    g8h_count = 0
    cpr_count = 0
    pg8h_count = 0
    pcpr_count = 0

    #Loop through the predctions and save new parts. 
    while sum_of_parts < max_combinations:
        sum_of_parts =  g8h_count*cpr_count*pg8h_count*pcpr_count

        parts_encounteres['Sum of parts'] = str(sum_of_parts)
        parts_encounteres['Predictions'] = str(i)
        
        g8h = df.G8H[i]
        pg8h = df.pG8H[i]
        cpr = df.CPR[i]
        pcpr = df.pCPR[i]
        if g8h not in parts_encounteres['G8H']:
            parts_encounteres['G8H'].append(g8h)
            g8h_count += 1
        if pg8h not in parts_encounteres['pG8H']:
            parts_encounteres['pG8H'].append(pg8h)
            pg8h_count += 1
        if cpr not in parts_encounteres['CPR']:
            parts_encounteres['CPR'].append(cpr)
            cpr_count += 1
        if pcpr not in parts_encounteres['pCPR']:
            parts_encounteres['pCPR'].append(pcpr)
            pcpr_count += 1
        i += 1

    return parts_encounteres