Source code for CompSensePack.dictionaries.MOD

"""
MOD.py - Method of Optimal Directions (MOD) algorithm for dictionary learning.

This module implements the MOD algorithm, which is used for learning a dictionary for sparse coding.
It includes utility functions to measure the distance between dictionaries and to learn a dictionary
that can sparsely represent a set of signals using the OMP algorithm.

Example usage:
--------------
>>> from MOD import MOD, I_findDistanceBetweenDictionaries
>>> data = np.random.randn(100, 200)  # Random signals
>>> params = {'K': 50, 'num_iterations': 10, 'initialization_method': 'DataElements', 'L': 5}
>>> dictionary, coefficients = MOD(data, params)

>>> # Distance between original and updated dictionary
>>> ratio, total_distance = I_findDistanceBetweenDictionaries(dictionary, new_dictionary)
"""

# system imports
import numpy as np
import scipy.sparse as sp
from scipy.linalg import solve
from .OMP import OMP  # Import OMP algorithm from the OMP module


[docs]
def I_findDistanceBetweenDictionaries(original, new):
    """
    Calculates the distance between two dictionaries.

    This function compares two dictionaries by calculating the number of columns 
    that have a very low error between the original and the new dictionary.

    Parameters
    ----------
    original : numpy.ndarray
        The original dictionary.
    
    new : numpy.ndarray
        The new dictionary.

    Returns
    -------
    ratio : float
        The ratio of dictionary elements with a small error (i.e., those that 
        satisfy the condition `errorOfElement < 0.01`).
    
    totalDistances : float
        The sum of all error values between the original and new dictionary elements.
    
    Notes
    -----
    - The function modifies the new dictionary to ensure that each column starts 
      with a positive value (consistent with the original dictionary).
    
    Example
    -------
    >>> original_dict = np.random.randn(100, 50)
    >>> new_dict = np.random.randn(100, 50)
    >>> ratio, total_distance = I_findDistanceBetweenDictionaries(original_dict, new_dict)
    """
    
    catchCounter = 0
    totalDistances = 0

    # Ensure columns in new dictionary start with positive values
    for i in range(new.shape[1]):
        new[:, i] = np.sign(new[0, i]) * new[:, i]

    for i in range(original.shape[1]):
        d = np.sign(original[0, i]) * original[:, i]
        distances = np.sum(new - np.tile(d, (1, new.shape[1])), axis=0)
        index = np.argmin(distances)
        errorOfElement = 1 - np.abs(new[:, index].T @ d)
        totalDistances += errorOfElement
        catchCounter += errorOfElement < 0.01

    ratio = catchCounter / original.shape[1]
    return ratio, totalDistances




[docs]
def MOD(data, parameters):
    """
    Method of Optimal Directions (MOD) algorithm for dictionary learning.

    The MOD algorithm is a method for learning a dictionary for sparse representation of signals.
    It iteratively updates the dictionary to best represent the input data with sparse coefficients
    using the Orthogonal Matching Pursuit (OMP) algorithm.

    Parameters
    ----------
    data : numpy.ndarray
        An (n x N) matrix containing N signals, each of dimension n.
    
    parameters : dict
        A dictionary containing the parameters for the MOD algorithm:
            - K : int
                The number of dictionary elements (columns) to train.
            
            - num_iterations : int
                The number of iterations to perform for dictionary learning.
            
            - initialization_method : str
                Method to initialize the dictionary. Options are:
                * 'DataElements' - Initializes the dictionary using the first K data signals.
                * 'GivenMatrix' - Initializes the dictionary using a provided matrix 
                  (requires 'initial_dictionary' key).

            - initial_dictionary : numpy.ndarray, optional
                The initial dictionary matrix to use if 'initialization_method' is 
                set to 'GivenMatrix'. It should be of size (n x K).

            - L : int
                The number of non-zero coefficients to use in OMP for sparse
                representation of each signal.

    Returns
    -------
    dictionary : numpy.ndarray
        The trained dictionary of size (n x K), where each column is a dictionary element.

    coef_matrix : numpy.ndarray
        The coefficient matrix of size (K x N), representing the sparse representation
        of the input data using the trained dictionary.
    
    Notes
    -----
    - The dictionary is initialized either from the data signals or from a provided matrix.
    - The OMP algorithm is used to solve the sparse coding step at each iteration.
    - The algorithm iteratively refines the dictionary by solving a least squares problem.
    - The columns of the dictionary are normalized to unit norm and positive first elements.
    
    Example
    -------
    >>> data = np.random.randn(100, 200)
    >>> params = {'K': 50, 'num_iterations': 10, 'initialization_method': 'DataElements', 'L': 5}
    >>> dictionary, coefficients = MOD(data, params)
    """
    
    # Check if the number of signals is smaller than the dictionary size
    if data.shape[1] < parameters['K']:
        print("MOD: number of training signals is smaller than the dictionary size. Returning trivial solution...")
        dictionary = data[:, :data.shape[1]]
        coef_matrix = np.eye(data.shape[1])  # Trivial coefficients
        return dictionary, coef_matrix

    # Initialize dictionary based on the specified method
    if parameters['initialization_method'] == 'DataElements':
        dictionary = data[:, :parameters['K']]
    elif parameters['initialization_method'] == 'GivenMatrix':
        if 'initial_dictionary' not in parameters:
            raise ValueError("initial_dictionary parameter is required when "
                             "initialization_method is set to 'GivenMatrix'.")
        dictionary = parameters['initial_dictionary']
    else:
        raise ValueError(
            "Invalid value for initialization_method. Choose 'DataElements' or 'GivenMatrix'.")

    # Convert to float64 for precision
    dictionary = dictionary.astype(np.float64)

    # Normalize dictionary columns and avoid division by zero
    column_norms = np.linalg.norm(dictionary, axis=0)
    column_norms[column_norms < 1e-10] = 1  # Prevent division by zero
    dictionary /= column_norms

    # Ensure positive first elements
    dictionary *= np.sign(dictionary[0, :])

    prev_dictionary = dictionary.copy()

    # Run MOD algorithm
    for iter_num in range(parameters['num_iterations']):
        # Step 1: Sparse coding using OMP
        coef_matrix = OMP(dictionary, data, parameters['L'])

        # Step 2: Update the dictionary
        regularization_term = 1e-7 * sp.eye(coef_matrix.shape[0])
        matrix_a = coef_matrix @ coef_matrix.T + regularization_term.toarray()

        # Use pinv for numerical stability: "lstsq" or "regularizations" could also work
        dictionary = data @ coef_matrix.T @ np.linalg.pinv(matrix_a)

        # Normalize dictionary columns and avoid division by zero
        column_norms = np.linalg.norm(dictionary, axis=0)
        column_norms[column_norms < 1e-10] = 1  # Prevent division by zero
        dictionary /= column_norms

        # Ensure positive first elements
        dictionary *= np.sign(dictionary[0, :])

        # Convergence check
        if np.linalg.norm(dictionary - prev_dictionary) < 1e-5:
            print(f"MOD converged after {iter_num + 1} iterations.")
            break

        prev_dictionary = dictionary.copy()

    return dictionary, coef_matrix