Source code for fluxpy.utils.utils

""" Functionalities to handle metabolic modeling analysis objects"""

import numpy as np
import pandas as pd
import cobra
from typing import Union, Optional
from ..constants import *

[docs]NestedDataFrameType = pd.DataFrame


[docs]def extract_model_namespace(model):
    """
    Returns a model's namespace considering only ModelSEED and BiGG

    Args:
        model (cobra.Model, mandatory) -- the model to extract its namespace from

    Returns:
        namespace (str) -- a Literal["modelseed", "bigg"]
    """
    return "modelseed" if model.metabolites[0].id.startswith("cpd") else "bigg"

# %% Util functions: parsing

[docs]def nonzero_fluxes(fluxes:  Union[cobra.Solution, pd.Series]) -> None:
    """
    Returns nonzero fluxes from a pandas Series.

    This function takes a pandas Series containing flux values and returns a new Series with only the nonzero fluxes.

    Args:
        sol (pd.Series, mandatory): A pandas Series representing flux values.

    Returns:
        pd.Series: A pandas Series containing only the nonzero flux values from the input Series.
    """
    if not isinstance(fluxes, cobra.Solution) and not isinstance(fluxes, pd.Series):
        return ValueError("Provide a cobra.Solution or a pandas.Series element as fluxes.")
    if isinstance(fluxes, cobra.Solution):
        fluxes = fluxes.fluxes
    mask = fluxes.to_numpy().nonzero()
    return fluxes.iloc[mask]



[docs]def get_rxns_producing_consuming_met(met_id, model: cobra.Model = None, flux_vector: pd.Series = None):
    """
    Returns a DataFrame with the reaction IDs of those consuming and producing a given metabolite in a model.

    This function optimizes the given metabolic model to identify reactions involving a specified metabolite.
    It categorizes reactions into those producing and those consuming the metabolite and returns this information
    in a DataFrame.

    Args:
        model (cobra.Model, mandatory): The metabolic model to be analyzed.
        met_id (str, optional): The ID of the metabolite of interest.
        flux_vector (pd.Series, optional):

    Returns:
        pd.DataFrame: A DataFrame with two columns:
                    - `consuming_{met_id}`: Reactions consuming the specified metabolite in flux vector provided
                    - `producing_{met_id}`: Reactions producing the specified metabolite in flux vector provided

    Examples:

        >>> import cobra
        >>> model = cobra.io.read_sbml_model('e_coli_core.xml')
        >>> df = get_reactions_producing_met(model, 'met_id')
        >>> print(df)
            consuming_met_id producing_met_id
        0  reaction1          reaction2

    Notes:
        - The function assumes the model is properly optimized and that each reaction's flux can be accessed via the solution object.
        - Reactions with a flux less than 0 are considered to be consuming reactants and producing products in the reverse direction.
        - Reactions with a flux greater than 0 are considered to be producing reactants and consuming products in the forward direction.
        - This applies for the specific medium provided with the model
    """
    if model is None or not isinstance(model, cobra.Model):
        return TypeError("model needs to be a cobra.Model object.")

    if flux_vector is not None and not isinstance(flux_vector, pd.Series):
        return TypeError("flux_vector needs to be a pd.Series object.")

    if flux_vector is None:
        flux_vector = model.optimize()

    reactions_of_interest = model.metabolites.get_by_id(met_id).reactions

    reactions_producing_met = []
    reactions_consuming_met = []

    for reaction in reactions_of_interest:

        if flux_vector[reaction.id] < 0:

            for reactant in reaction.reactants:
                if met_id == reactant.id:
                    reactions_producing_met.append(reaction)
                    break

            for product in reaction.products:
                if met_id == product.id:
                    reactions_consuming_met.append(reaction)
                    break

        if flux_vector[reaction.id] > 0:

            for reactant in reaction.reactants:
                if met_id == reactant.id:
                    reactions_consuming_met.append(reaction)
                    break
            for product in reaction.products:
                if met_id == product.id:
                    reactions_producing_met.append(reaction)
                    break

    cons_df = pd.DataFrame({
        f'consuming {met_id}': reactions_consuming_met,
    })
    prod_df = pd.DataFrame({
        f'producing {met_id}': reactions_producing_met
    })
    return (cons_df, prod_df)


[docs]def track_metabolite_pathways(
    metabolite,
    model,
    skip_objective=True,
    _namespace=None,  # Literal["modelseed", "bigg"]
    _cofactors=None,
    _visited_metabolites=None,
    _inner_reactions=None,
    ):
    """
    Returns pathways that are initiated from a nutrient.

    """
    if _namespace is None:
        _namespace = extract_model_namespace(model)
    if _cofactors is None:
        _cofactors = MODELSEED_COFACTORS if _namespace == "modelseed" else BIGG_COFACTORS if _namespace == "bigg" else \
                    (_ for _ in ()).throw(ValueError("Invalid namespace. Only 'modelseed' or 'bigg' are allowed."))

    if skip_objective:
        for r in model.reactions:
            if r.objective_coefficient != 0 :
                model.remove_reactions([r])
        skip_objective = False

    _inner_reactions = _inner_reactions if _inner_reactions is not None else set()
    _visited_metabolites = _visited_metabolites if _visited_metabolites is not None else set()

    # Mark metabolite as visited
    _visited_metabolites.add(metabolite.id)

    reactions_with_the_met = model.metabolites.get_by_id(metabolite.id).reactions
    has_irreversible_reaction = any(not reaction.reversibility for reaction in reactions_with_the_met)

    print("\n\n============\n\n", "Metabolite under study:", metabolite.name, metabolite.id)
    print("Metabolite's total related reactions to be parsed:", reactions_with_the_met)

    # Check reactions involving this metabolite
    for reaction in reactions_with_the_met:

        # Add reaction to _inner_reactions if it's not already there
        if reaction not in _inner_reactions:
            _inner_reactions.add(reaction)

            print("\nReaction", reaction.id, "added in inner_reactions and is now under proc.")

            # ----
            # Traverse through metabolites in the reaction
            # ----

            # In case of a irreversible reaction..
            if not reaction.reversibility:
                if metabolite in reaction.reactants:
                    for in_metabolite in reaction.products:
                        # [NOTE] If it's an exchange reaction, then products are empty
                        print("1.Run recursive for IRREVERSIBLE met", in_metabolite.id, "from reaction in proc:", reaction.id)
                        _perform_recursive(in_metabolite, reaction, model, _visited_metabolites, _cofactors, _inner_reactions)
                else:
                    print("2.Metabolite:", metabolite.id, " not as reactant in:", reaction.build_reaction_string(),
                          "from reaction in proc:", reaction.id, ". Not new recursive.")

            # In case of a reversible reaction..
            else:
                # where the metabolite under study either is found only in this reaction or there is at least 1 reaction in those it participates that is reversible
                if len(reactions_with_the_met) == 1 or has_irreversible_reaction is False:

                    # then, parse through the reaction's products if the met under study is among its reactants
                    if metabolite in reaction.reactants:
                        for in_metabolite in reaction.products:
                            print("3.New recursive for REVERSIBLE reaction in proc", reaction.id, "in products, for met:", in_metabolite.id)
                            _perform_recursive(in_metabolite, reaction, model, _visited_metabolites, _cofactors, _inner_reactions)
                    # or the other way around
                    else:
                        for in_metabolite in reaction.reactants:
                            print("4.New recursive for REVERSIBLE reaction in proc", reaction.id, "in reactants, for met:", in_metabolite.id)
                            _perform_recursive(in_metabolite, reaction, model, _visited_metabolites, _cofactors, _inner_reactions)
                # where the met under study
                else:
                    print("5.Reaction", reaction.id, "is irreversible and there are more than 1 reactions with the met under study (", metabolite.id,
                          ") among which there is at least one reversible reaction. Thus, we skip further parsing for reac.")

    return(_inner_reactions, _visited_metabolites)




"""
Internal routines
"""
[docs]def _convert_list_to_binary(lst):
    """
    Converts an ndarray to a binary decision based on the presence of at least one nonzero/True value.

    This function checks if the input `lst` is a NumPy ndarray. If so, it evaluates whether there is
    at least one nonzero (or True) element in the array. If such an element exists, the function returns 1;
    otherwise, it returns 0. If `lst` is not a NumPy ndarray, it returns the original `lst` unchanged.

    Parameters:
    lst (any): The input that might be a NumPy ndarray.

    Returns:
    int or any: Returns 1 if `lst` is a NumPy ndarray with at least one nonzero/True value;
                returns 0 if `lst` is a NumPy ndarray with all elements zero/False;
                returns the original `lst` if it is not a NumPy ndarray.
    """
    if isinstance(lst, np.ndarray):
        if any(lst):
            return 1
        else:
            return 0
    else:
        return lst


[docs]def _convert_single_element_set(cell):
    """
    Converts a set with a single element to that element.

    This function checks if the input `cell` is a set containing exactly one element.
    If so, it converts the set to that single element. If `cell` is not a set with
    a single element, it returns the original `cell` unchanged.

    Parameters:
    cell (any): The input that might be a set with a single element.

    Returns:
    any: The single element if `cell` is a set with one element; otherwise, the original `cell`.
    """
    if isinstance(cell, set) and len(cell) == 1:
        return next(iter(cell))  # Convert set to string
    return cell


[docs]def _perform_recursive(in_met, reaction, model, visited_metabolites, cofactors, inner_reactions):
    # Check if metabolite is not in BIGG_COFACTORS and not visited
    if (
        in_met.id not in cofactors and
        in_met.id not in visited_metabolites
    ):
        print(">>", in_met.id, "from", reaction.build_reaction_string())
        # Recursively find inner reactions for this metabolite
        track_metabolite_pathways(
            metabolite=in_met,
            model=model,
            _cofactors = cofactors,
            _visited_metabolites=visited_metabolites,
            _inner_reactions=inner_reactions
        )


[docs]def find_connected_components(graph):
    # Define a function to find connected components using Depth-First Search (DFS)

    # A set to keep track of visited nodes
    visited = set()
    # A list to store the connected components
    components = []

    # Helper function to perform a recursive DFS
    def dfs(node, component):
        # Mark the current node as visited
        visited.add(node)
        # Add the node to the current component
        component.append(node)
        # Visit all unvisited neighbors of the current node
        for neighbor in graph[node]:
            if neighbor not in visited:
                dfs(neighbor, component)

    # Iterate through all nodes in the graph
    for node in graph:
        # If the node has not been visited, it's the start of a new connected component
        if node not in visited:
            # Create a new component (list) to hold connected nodes
            component = []
            # Perform DFS starting from this node
            dfs(node, component)
            # Add the completed component to the list of components
            components.append(component)

    # Return the list of connected components
    return components
        

[docs]def order_cofactors_by_abundance(sorted_counts_data, specific_reaction_cofactors):           
    """
    Function that takes the cofactors of a single reaction and sorts them 
    based on their overall abundance in the model

    Example input:
    
    sorted_counts_data = [('h_c', 35), ('h2o_c', 18), ('h_e', 17), ('atp_c', 13), 
                        ('adp_c', 12), ('nad_c', 12), ('nadh_c', 12), ('pi_c', 12)]
                        
    specific_reaction_cofactors = ['h_e', 'h_c', 'adp_c', 'nadh_c']
    
    Example output:
    
    [35, 17, 12, 12]
    """

    # Convert the list of tuples to a dictionary for fast lookup
    abundance_dict = dict(sorted_counts_data)
    
    # Sort the items based on their abundance in the dictionary
    sorted_cofactors = sorted(specific_reaction_cofactors, key=lambda x: abundance_dict.get(x, 0), reverse=True)
    sorted_abundances = [abundance_dict.get(item, 0) for item in sorted_cofactors]
    
    return sorted_abundances

        
[docs]def find_first_winning_sublist_with_losers(sublists):
    """
    Find the "winning" sublist and indices of non-winning sublists.
    
    Parameters:
        sublists (list of lists): Input list of sublists to compare.

    Returns:
        tuple: (winning_sublist, losers_indices)

    Example input:
    
    sublists = [
        [9, 3, 5, 7],
        [9, 3, 4, 6],
        [0, 5, 3, 3],
        [4, 2, 1, 6],
        [7, 3, 2, 0]
    ]
    
    Example output:
    
    [9, 3, 5, 7]
    
    """
    
    winning_sublist = max(sublists, key=lambda x: x)
    losers_indices = [i for i, sublist in enumerate(sublists) if sublist != winning_sublist]
    
    return winning_sublist, losers_indices