
    LioX                       d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZ 	 d dlmZ d dlZd d	lmZmZmZmZmZmZ d d
lmZ d dlmZ d dl m!Z! er e       rd dl"m#Z# 	 d dlm$Z$m%Z% 	 d dlm&Z&  ejN                  e(      Z)ed        Z*ddZ+ G d de!      Z,y# e$ r	 d dlmZ Y |w xY w# e$ r  G d d      Z$ G d d      Z%Y ]w xY w# e$ r  G d d      Z&Y kw xY w)    )annotationsN)Callable)contextmanager)Path)TYPE_CHECKINGAny)load_onnx_modelload_openvino_model)Self)
AutoConfig	AutoModelAutoTokenizer	MT5ConfigPretrainedConfigT5Config)is_peft_available)find_adapter_config_file)InputModule
PeftConfig)T5Gemma2ConfigT5Gemma2TextConfigc                      e Zd Zy)r   N__name__
__module____qualname__     n/var/www/html/lcp-python-backend/venv/lib/python3.12/site-packages/sentence_transformers/models/Transformer.pyr   r           r   r   c                      e Zd Zy)r   Nr   r   r   r    r   r   #   r!   r   r   )T5GemmaConfigc                      e Zd Zy)r#   Nr   r   r   r    r#   r#   +   r!   r   r#   c           	   +  >  K   |D ci c]  }|t        | |d        }}	 |j                         D ]  \  }}t        | ||        d  |j                         D ]  \  }}t        | ||        y c c}w # |j                         D ]  \  }}t        | ||        w xY ww)N)getattritemssetattr)cls	overridesname	originalsvalues        r    set_temporary_class_attrsr.   2   s     <EFDwsD$//FIF&$??, 	&KD%Cu%	&$??, 	&KD%Cu%	& G %??, 	&KD%Cu%	&s&   BA.B)A3 +B3'BBc                     d fd}|S )Nc                t    t        j                  t        |       z  d        t        |       z  fi |S )NT)exist_ok)osmakedirsr   )save_directorykwargs_save_pretrained_fn	subfolders     r    wrapperz)_save_pretrained_wrapper.<locals>.wrapper?   s5    
D(94tD"4#7)#CNvNNr   )r4   z
str | PathreturnNoner   )r6   r7   r8   s   `` r    _save_pretrained_wrapperr;   >   s    O Nr   c                      e Zd ZU dZdZded<   ddgZded<   d	Zd
ed<   	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 	 	 	 	 	 	 	 	 	 ddZ		 	 	 	 	 	 	 	 	 	 	 	 ddZ
ddZddZddZ	 d	 	 	 	 	 ddZdd dZe	 	 	 	 	 	 	 	 	 	 d!	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d"d       Ze	 	 	 	 	 	 	 	 	 	 d!	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d#d       Ze	 	 	 	 	 	 d$	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d% fd       Z xZS )&Transformera  Hugging Face AutoModel to generate token embeddings.
    Loads the correct class, e.g. BERT / RoBERTa etc.

    Args:
        model_name_or_path: Hugging Face models name
            (https://huggingface.co/models)
        max_seq_length: Truncate any inputs longer than max_seq_length
        model_args: Keyword arguments passed to the Hugging Face
            Transformers model
        tokenizer_args: Keyword arguments passed to the Hugging Face
            Transformers tokenizer
        config_args: Keyword arguments passed to the Hugging Face
            Transformers config
        cache_dir: Cache dir for Hugging Face Transformers to store/load
            models
        do_lower_case: If true, lowercases the input (independent if the
            model is cased or not)
        tokenizer_name_or_path: Name or path of the tokenizer. When
            None, then model_name_or_path is used
        backend: Backend used for model inference. Can be `torch`, `onnx`,
            or `openvino`. Default is `torch`.
    sentence_bert_config.jsonstrconfig_file_namemax_seq_lengthdo_lower_casez	list[str]config_keysTboolsave_in_rootc
                J   t         |           || _        |	| _        |i }|i }|i }| j	                  |||	|      \  }
} | j
                  ||
||	|fi | t        t        j                  | j                  j                        j                        }t        |      h dz  | _        |	d|vr||d<   t        j                  ||n|fd|i|| _        |t#        | j                  d      rtt#        | j                  j$                  d      rTt#        | j                   d      r>t'        | j                  j$                  j(                  | j                   j*                        }|| _        |:| j                   j.                  j0                  | j                  j$                  _        y y )N>   	input_idsinputs_embedsattention_masktoken_type_idsmodel_max_length	cache_dirconfigmax_position_embeddings)super__init__rB   backend_load_config_load_modellistinspect	signature
auto_modelforward
parameterssetmodel_forward_paramsr   from_pretrained	tokenizerhasattrrM   minrN   rK   rA   	__class__r   tokenizer_class)selfmodel_name_or_pathrA   
model_argstokenizer_argsconfig_argsrL   rB   tokenizer_name_or_pathrQ   rM   is_peft_modelr[   r`   s                r    rP   zTransformer.__init__b   s    	*J!NK $ 1 12DiQXZe f+VYeZde  $G$5$5doo6M6M$N$Y$YZ$'(<$= A
 %
! %*<N*R1?N-.&66&<&H"N`

 
 !2DOO224MNDNN,>?!$T__%;%;%S%SUYUcUcUtUt!u,!-59^^5M5M5V5VDOO""2 .r   c           
     @   t        |||j                  d      |j                  d      |j                  dd            	 Dt               st        d      |dk7  rt	        d      d	d
lm}  |j                  |fi |d|idfS t        j                  |fi |d|idfS )a  Loads the transformers or PEFT configuration

        Args:
            model_name_or_path (str): The model name on Hugging Face (e.g. 'sentence-transformers/all-MiniLM-L6-v2')
                or the path to a local model directory.
            cache_dir (str | None): The cache directory to store the model configuration.
            backend (str): The backend used for model inference. Can be `torch`, `onnx`, or `openvino`.
            config_args (dict[str, Any]): Keyword arguments passed to the Hugging Face Transformers config.

        Returns:
            tuple[PretrainedConfig, bool]: The model configuration and a boolean indicating whether the model is a PEFT model.
        tokenrevisionlocal_files_onlyF)rL   rj   rk   rl   zgLoading a PEFT model requires installing the `peft` package. You can install it via `pip install peft`.torcha  PEFT models can currently only be loaded with the `torch` backend. To use other backends, load the model with `backend="torch"`, call `model.transformers_model.merge_and_unload()`, save that model with `model.save_pretrained()` and then load the model with the desired backend.r   r   rL   T)	r   getr   	Exception
ValueErrorpeftr   r\   r   )rb   rc   rL   rQ   rf   r   s         r    rR   zTransformer._load_config   s      %"#!oog.$4!,1CU!K  %&}  '! w 
 (-:--.@eKe[degkkk))*<aaW`achhhr   c                8   |dk(  r|rdD ]  }|j                  |d        t        |t              r9ddlm} t        |dg      5   |j                  |f||d|| _        ddd       yt        |t              r9dd	lm	}	 t        |	dg      5   |	j                  |f||d|| _        ddd       yt        |t              r@dd
lm}
 d|_        t        |
dg      5   |
j                  |f||d|| _        ddd       yt        |t              rDddlm} t        |ddg      5   |j                  |f|j                   |d|| _        ddd       yt        |t"              r"ddlm}  |j                  |f||d|| _        yt%        j                  |f||d|| _        y|dk(  rt'        d||dd|| _        y|dk(  rt)        d||dd|| _        yt+        d| d      # 1 sw Y   yxY w# 1 sw Y   yxY w# 1 sw Y   yxY w# 1 sw Y   yxY w)a  Loads the transformers or PEFT model into the `auto_model` attribute

        Args:
            model_name_or_path (str): The model name on Hugging Face (e.g. 'sentence-transformers/all-MiniLM-L6-v2')
                or the path to a local model directory.
            config ("PeftConfig" | PretrainedConfig): The model configuration.
            cache_dir (str | None): The cache directory to store the model configuration.
            backend (str): The backend used for model inference. Can be `torch`, `onnx`, or `openvino`.
            is_peft_model (bool): Whether the model is a PEFT model.
            model_args (dict[str, Any]): Keyword arguments passed to the Hugging Face Transformers model.
        rm   )rk   Nr   )T5EncoderModelz	decoder.*)"_keys_to_ignore_on_load_unexpected)rM   rL   )MT5EncoderModel)T5GemmaEncoderModelF)T5Gemma2Encoderzmodel.encoder)base_model_prefixrt   onnxzfeature-extraction)rc   rM   	task_nameopenvinozUnsupported backend 'z6'. `backend` should be `torch`, `onnx`, or `openvino`.r   )pop
isinstancer   transformersrs   r.   r\   rW   r   ru   r#   rv   is_encoder_decoderr   .transformers.models.t5gemma2.modeling_t5gemma2rw   encoderr   r   r	   r
   rp   )rb   rc   rM   rL   rQ   rh   rd   adapter_only_kwargrs   ru   rv   rw   s               r    rS   zTransformer._load_model   s   ( g *6 =&NN#5t<= &(+7.~cnbop &Dn&D&D*'39Y'R\'DO  FI.8.docpq &Eo&E&E*'39Y'R\'DO  FM2<,1)./Bhsgtu &I&9&I&I*'39Y'R\'DO  FN3Z.#&58C} 
 'Fo&E&E*'39>>Y'Zd'DO  F$67Z"A/"A"A&#/5#NX# #,";";&#/5#NX# - #5. 	DO 
"1 #5. 	DO 4WI=stuuy    s0   G,G8H)&H,G58HHHc                |    dt        | j                         | j                  j                  j                         dS )NzTransformer()architecture))dictget_config_dictrW   r`   r   )rb   s    r    __repr__zTransformer.__repr__"  s3    d4#7#7#9HaHaHjHjkllmnnr   c                   |j                         D ci c]  \  }}|| j                  v s|| }}} | j                  di ||ddi}|d   }||d<   t               rddlm} t        | j                  |      r| j                  j                  j                  rr|j                  d      }	|d   }
t        j                  |	| j                  j                  j                  |
j                        }t        j                  ||
fd	      |d<   | j                  j                  j                   rd
|v r|d
   |d<   |S c c}}w )a^  
        Forward pass through the transformer model.

        This method processes the input features through the underlying transformers model
        and returns the token embeddings along with any other relevant outputs.

        Notes:
            - Only passes arguments that are expected by the underlying transformer model

        Args:
            features (dict[str, torch.Tensor]): Input features dictionary containing at least
                'input_ids' and 'attention_mask'. May also contain other tensors required by
                the underlying transformer model.
            **kwargs: Additional keyword arguments to pass to the underlying transformer model.

        Returns:
            dict[str, torch.Tensor]: Updated features dictionary containing the input features, plus:
                - 'token_embeddings': Token-level embeddings from the transformer model
                - 'attention_mask': Possibly modified attention mask if using PeftModel with prompt learning
                - 'all_layer_embeddings': If the model outputs hidden states, contains embeddings from all layers
        return_dictTr   token_embeddings)PeftModelForFeatureExtractionrI   )device   )dimhidden_statesall_layer_embeddingsr   )r'   r[   rW   r   rq   r   r}   active_peft_configis_prompt_learningsizerm   onesnum_virtual_tokensr   catrM   output_hidden_states)rb   featuresr5   keyr-   trans_featuresoutputsr   r   
batch_sizerI   prefix_attention_masks               r    rX   zTransformer.forward%  s2   , 8@~~7Gle3RVRkRkKk#u*ll!$//ONOfO$O"1:'7#$ : 4??,IJOO66II-2215
!)*:!;(-

 B B U U^l^s^s)% .3YY8M~7^de-f)*??!!66?g;U/6/GH+,3 ms
   D;D;c                    	 | j                   j                  j                         }|j                  S # t        $ r$ | j                   j                  }Y |j                  S w xY w)a  Get the output embedding dimension from the transformer model.

        Returns:
            int: The hidden dimension size of the model's embeddings.

        Raises:
            AttributeError: If the embedding dimension cannot be determined from the model config.
        )rW   rM   get_text_configAttributeErrorhidden_size)rb   text_configs     r    get_word_embedding_dimensionz(Transformer.get_word_embedding_dimensionV  sY    	1//00@@BK &&&  	1//00K&&&	1s   $2 AAc           
        i }t        |d   t              r|g}nt        |d   t              r\g }g |d<   |D ]L  }t        t	        |j                                     \  }}|j                  |       |d   j                  |       N |g}n7g g }	}|D ]*  }
|j                  |
d          |	j                  |
d          , ||	g}|D cg c])  }|D cg c]  }t        |      j                          c}+ }}}| j                  r-|D cg c]   }|D cg c]  }|j                          c}" }}}|j                   | j                  ||dd| j                  d       |S c c}w c c}}w c c}w c c}}w )z-Tokenizes a text and maps tokens to token-idsr   	text_keysr   longest_firstpt)padding
truncationreturn_tensors
max_length)r}   r?   r   nextiterr'   appendstriprB   lowerupdater]   rA   )rb   textsr   outputto_tokenizelookuptext_keytextbatch1batch2
text_tuplecolss                r    tokenizezTransformer.tokenizeh  sr    eAh$ 'Ka$'K"$F; 5!%d6<<>&:!;$""4({#**845 '-KFF# -
jm,jm,- "6*K ALL41A4LL ?JKs3!AGGI3KKKDNN*#..	
  5L 4Ks0   	E$ E.E$	E/E*'E/E$*E/c                    | j                   j                  ||       | j                  j                  |       | j                  |       y )N)safe_serialization)rW   save_pretrainedr]   save_config)rb   output_pathr   r5   s       r    savezTransformer.save  s:    ''HZ'[&&{3%r   c                N    | j                  |||||||||	|
|      } | dd|i|S )N)rc   r7   rj   cache_folderrk   rl   trust_remote_codemodel_kwargstokenizer_kwargsconfig_kwargsrQ   rc   r   )_load_init_kwargs)r)   rc   r7   rj   r   rk   rl   r   r   r   r   rQ   r5   init_kwargss                 r    loadzTransformer.load  sP    $ ++1%-/%-' , 
 H&8HKHHr   c                   | j                  ||||||      }|||||d}d|vri |d<   d|vri |d<   d|vri |d<   |d   j                  |       |d   j                  |       |d   j                  |       |r|d   j                  |       |	r|d   j                  |	       |
r|d   j                  |
       i |||dS )N)rc   r7   rj   r   rk   rl   )r7   rj   rk   rl   r   rd   re   rf   )rL   rQ   )load_configr   )r)   rc   r7   rj   r   rk   rl   r   r   r   r   rQ   r5   rM   
hub_kwargss                  r    r   zTransformer._load_init_kwargs  s   $ 1%- ! 
 #  0!2

 v%#%F< 6)')F#$&$&F=! 	|##J/ ''
3}$$Z0 < ''5#$++,<==!((7H&H|HHr   c           
        |r|gng d}|D ]  }t         
|   |||||||      }	|	s n d	v rd|	d   v r|	d   j                  d       d|	v rd|	d   v r|	d   j                  d       d|	v rd|	d   v r|	d   j                  d       |	S )N)r>   zsentence_roberta_config.jsonzsentence_distilbert_config.jsonzsentence_camembert_config.jsonzsentence_albert_config.jsonz sentence_xlm-roberta_config.jsonzsentence_xlnet_config.json)rc   r7   config_filenamerj   r   rk   rl   rd   r   re   rf   )rO   r   r|   )r)   rc   r7   r   rj   r   rk   rl   config_filenamesrM   r`   s             r    r   zTransformer.load_config  s       	  0 	OW(#5# /)!!1 ) F 	 6!&9VL=Q&Q< $$%89v%*=HXAY*Y#$(()<=F"':f]>S'S=!%%&9:r   )NNNNNFNrm   )rc   r?   rA   z
int | Nonerd   dict[str, Any] | Nonere   r   rf   r   rL   
str | NonerB   rD   rg   r   rQ   r?   r9   r:   )
rc   r?   rL   r   rQ   r?   rf   dict[str, Any]r9   z*tuple[PeftConfig | PretrainedConfig, bool])rc   r?   rM   zPeftConfig | PretrainedConfigrL   r?   rQ   r?   rh   rD   r9   r:   )r9   r?   )r   dict[str, torch.Tensor]r9   r   )r9   int)T)r   z.list[str] | list[dict] | list[tuple[str, str]]r   z
str | boolr9   r   )r   r?   r   rD   r9   r:   )
 NNNFFNNNrm   )rc   r?   r7   r?   rj   bool | str | Noner   r   rk   r   rl   rD   r   rD   r   r   r   r   r   r   rQ   r?   r9   r   )rc   r?   r7   r?   rj   r   r   r   rk   r   rl   rD   r   rD   r   r   r   r   r   r   rQ   r?   r9   r   )r   NNNNF)rc   r?   r7   r?   r   r   rj   r   r   r   rk   r   rl   rD   r9   r   )r   r   r   __doc__r@   __annotations__rC   rE   rP   rR   rS   r   rX   r   r   r   classmethodr   r   r   __classcell__)r`   s   @r    r=   r=   F   s`   . 8c7.@K@L$
 &*,004-1 $#-17W7W #7W *	7W
 .7W +7W 7W 7W !+7W 7W 
7Wr(i"%(i2<(iGJ(iYg(i	3(iT[v[v .[v 	[v
 [v [v 
[vzo/b'& \`&C&NX&	 &P&
 
 #'#'#!&"'.226/3II 	I
 !I !I I I  I ,I 0I -I I  
!I I@ 
 #'#'#!&"'.226/37I7I 	7I
 !7I !7I 7I 7I  7I ,7I 07I -7I 7I  
!7I 7Ir  &*#'#'#!&++ + $	+
 !+ !+ + + 
+ +r   r=   )r6   r   r7   r?   r9   zCallable[..., None])-
__future__r   rU   loggingr2   collections.abcr   
contextlibr   pathlibr   typingr   r   sentence_transformers.backendr	   r
   r   ImportErrortyping_extensionsrm   r~   r   r   r   r   r   r   transformers.utils.import_utilsr   transformers.utils.peft_utilsr   (sentence_transformers.models.InputModuler   rq   r   r   r   r#   	getLoggerr   loggerr.   r;   r=   r   r   r    <module>r      s    "   	 $ %  % N'  d d = B @&(?* 
		8	$ & &V+ Vo  '&'       s5   B+ 2B< ;C +B98B9<CCC-,C-