
    Li)                        d dl mZ d dlmZ d dlmZmZ d dlZd dlmZm	Z	 d dl
mZ d dlmZ  G d d	e	j                        Zy)
    )annotations)Iterable)AnyLiteralN)Tensornn)SentenceTransformer)cos_simc                       e Zd Zedddf	 	 	 	 	 	 	 	 	 d	 fdZ	 d
	 	 	 	 	 ddZ	 d
	 	 	 	 	 ddZddZddZe	dd       Z
 xZS )"GlobalOrthogonalRegularizationLoss      ?meanc                    t         |           || _        || _        || _        || _        |s|st        d      |dvrt        d| d      || _        y)a	  
        Global Orthogonal Regularization (GOR) Loss that encourages embeddings to be well-distributed
        in the embedding space by penalizing high mean similarities and high second moments of similarities
        across unrelated inputs.

        The loss consists of two terms:

        1. Mean term: Penalizes when the mean similarity across unrelated embeddings is high
        2. Second moment term: Penalizes when the second moment of similarities is high

        A high second moment indicates that some embeddings have very high similarities, suggesting clustering
        or concentration in certain regions of the embedding space. A low second moment indicates that
        similarities are more uniformly distributed.

        The loss is called independently on each input column (e.g., queries and passages) and combines the results
        using either mean or sum aggregation. This is why the loss can be used on any dataset configuration
        (e.g., single inputs, pairs, triplets, etc.).

        It's recommended to combine this loss with a primary loss function, such as :class:`MultipleNegativesRankingLoss`.

        Args:
            model: SentenceTransformer model
            similarity_fct: Function to compute similarity between embeddings (default: cosine similarity)
            mean_weight: Weight for the mean term loss component. None or 0 can be used to disable this term (default: 1.0)
            second_moment_weight: Weight for the second moment term loss component. None or 0 can be used to disable this term (default: 1.0)
            aggregation: How to combine losses across input columns. Either "mean" or "sum" (default: "mean").
                The EmbeddingGemma paper uses "sum".

        References:
            - For further details, see: https://arxiv.org/abs/1708.06320 or https://arxiv.org/abs/2509.20354.
              The latter paper uses the equivalent of GOR with ``mean_weight=0.0`` and ``aggregation="sum"``.

        Inputs:
            +-------+--------+
            | Texts | Labels |
            +=======+========+
            | any   | none   |
            +-------+--------+

        Example:
            ::

                import torch
                from datasets import Dataset
                from torch import Tensor
                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer
                from sentence_transformers.losses import GlobalOrthogonalRegularizationLoss, MultipleNegativesRankingLoss
                from sentence_transformers.util import cos_sim

                model = SentenceTransformer("microsoft/mpnet-base")
                train_dataset = Dataset.from_dict({
                    "anchor": ["It's nice weather outside today.", "He drove to work."],
                    "positive": ["It's so sunny.", "He took the car to the office."],
                })

                class InfoNCEGORLoss(torch.nn.Module):
                    def __init__(self, model: SentenceTransformer, similarity_fct=cos_sim, scale=20.0) -> None:
                        super().__init__()
                        self.model = model
                        self.info_nce_loss = MultipleNegativesRankingLoss(model, similarity_fct=similarity_fct, scale=scale)
                        self.gor_loss = GlobalOrthogonalRegularizationLoss(model, similarity_fct=similarity_fct)

                    def forward(self, sentence_features: list[dict[str, Tensor]], labels: Tensor | None = None) -> Tensor:
                        embeddings = [self.model(sentence_feature)["sentence_embedding"] for sentence_feature in sentence_features]
                        info_nce_loss: dict[str, Tensor] = {
                            "info_nce": self.info_nce_loss.compute_loss_from_embeddings(embeddings, labels)
                        }
                        gor_loss: dict[str, Tensor] = self.gor_loss.compute_loss_from_embeddings(embeddings, labels)
                        return {**info_nce_loss, **gor_loss}

                loss = InfoNCEGORLoss(model)
                trainer = SentenceTransformerTrainer(
                    model=model,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()

            Alternatively, you can use multi-task learning to train with both losses:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer
                from sentence_transformers.losses import GlobalOrthogonalRegularizationLoss, MultipleNegativesRankingLoss
                from datasets import Dataset

                model = SentenceTransformer("microsoft/mpnet-base")
                train_dataset = Dataset.from_dict({
                    "anchor": ["It's nice weather outside today.", "He drove to work."],
                    "positive": ["It's so sunny.", "He took the car to the office."],
                })
                mnrl_loss = MultipleNegativesRankingLoss(model)
                gor_loss = GlobalOrthogonalRegularizationLoss(model)

                trainer = SentenceTransformerTrainer(
                    model=model,
                    train_dataset={"main": train_dataset, "gor": train_dataset},
                    loss={"main": mnrl_loss, "gor": gor_loss},
                )
                trainer.train()
        zDAt least one of mean_weight or second_moment_weight must be non-zero)r   sumz*aggregation must be 'mean' or 'sum', got ''N)super__init__modelsimilarity_fctmean_weightsecond_moment_weight
ValueErroraggregation)selfr   r   r   r   r   	__class__s         /var/www/html/lcp-python-backend/venv/lib/python3.12/site-packages/sentence_transformers/losses/GlobalOrthogonalRegularizationLoss.pyr   z+GlobalOrthogonalRegularizationLoss.__init__   sh    X 	
,&$8!#7cddo-I+VWXYY&    c                p    |D cg c]  }| j                  |      d    }}| j                  |      S c c}w )Nsentence_embedding)r   compute_loss_from_embeddings)r   sentence_featureslabelssentence_feature
embeddingss        r   forwardz*GlobalOrthogonalRegularizationLoss.forward   sA     bssM]djj!123GHs
s00<< ts   3c                   t        |D cg c]  }| j                  |       c} \  }}i }| j                  rVt        j                  |      }| j
                  dk(  r|j                         n|j                         }| j                  |z  |d<   | j                  rVt        j                  |      }	| j
                  dk(  r|	j                         n|	j                         }
| j                  |
z  |d<   |S c c}w )aZ  
        Compute the GOR loss from pre-computed embeddings.

        Args:
            embeddings: List of embedding tensors, one for each input column (e.g., [queries, passages])
            labels: Not used, kept for compatibility

        Returns:
            Dictionary containing the weighted mean term and second moment term losses
        r   gor_meangor_second_moment)	zipcompute_gorr   torchstackr   r   r   r   )r   r$   r"   	embedding
mean_termssecond_moment_termsresultsstacked_meanaggregated_meanstacked_second_momentaggregated_second_moments              r   r    z?GlobalOrthogonalRegularizationLoss.compute_loss_from_embeddings   s     +.]g/hPY0@0@0K/h*i'
' ;;z2L484D4D4Ml..0S_SdSdSfO"&"2"2_"DGJ$$$)KK0C$D!/3/?/?5/H%))+NcNhNhNj % ,0+D+DG_+_G'( 0is   C1c                b   |j                  d      }|j                  d      }| j                  ||      }|j                  d       ||dz
  z  }|j                         |z  j	                  d      }|j	                  d      j                         |z  }t        j                  |d|z  z
        }||fS )a%  
        Compute the Global Orthogonal Regularization terms for a batch of embeddings.

        The GOR loss encourages embeddings to be well-distributed by:
        1. Mean term (M_1^2): Penalizes high mean similarity, pushing embeddings apart
        2. Second moment term (M_2 - 1/d): Penalizes when the second moment exceeds 1/d, encouraging uniform distribution

        Args:
            embeddings: Tensor of shape (batch_size, embedding_dim)

        Returns:
            Tuple of (mean_term, second_moment_term) losses (unweighted)
        r      g           r   )sizer   fill_diagonal_r   powr+   relu)	r   r$   
batch_size
hidden_dim
sim_matrixnum_off_diagonal	mean_termsecond_momentsecond_moment_terms	            r   r*   z.GlobalOrthogonalRegularizationLoss.compute_gor   s      __Q'
__Q'
 ((Z@
!!#&%a8  ^^%(88==a@	 #q)--/2BB"ZZz9I(JK,,,r   c                t    | j                   j                  | j                  | j                  | j                  dS )N)r   r   r   r   )r   __name__r   r   r   r   s    r   get_config_dictz2GlobalOrthogonalRegularizationLoss.get_config_dict   s6    "11::++$($=$=++	
 	
r   c                     y)NaO  
@misc{zhang2017learningspreadoutlocalfeature,
      title={Learning Spread-out Local Feature Descriptors},
      author={Xu Zhang and Felix X. Yu and Sanjiv Kumar and Shih-Fu Chang},
      year={2017},
      eprint={1708.06320},
      archivePrefix={arXiv},
      primaryClass={cs.CV},
      url={https://arxiv.org/abs/1708.06320},
}
 rE   s    r   citationz+GlobalOrthogonalRegularizationLoss.citation   s    
r   )
r   r	   r   float | Noner   rJ   r   zLiteral['mean', 'sum']returnNone)N)r!   zIterable[dict[str, Tensor]]r"   Tensor | NonerK   dict[str, Tensor])r$   zlist[Tensor]r"   rM   rK   rN   )r$   r   rK   ztuple[Tensor, Tensor])rK   zdict[str, Any])rK   str)rD   
__module____qualname__r
   r   r%   r    r*   rF   propertyrI   __classcell__)r   s   @r   r   r      s     $'-0.4u'"u' "	u'
 +u' ,u' 
u'p W[=!<=FS=	= AE&0=	6-B
  r   r   )
__future__r   collections.abcr   typingr   r   r+   r   r   )sentence_transformers.SentenceTransformerr	   sentence_transformers.utilr
   Moduler   rH   r   r   <module>rZ      s,    " $    I .N Nr   