
    {-jj                        d dl Z d dlZd dlmZmZ d dlmZ d dlmZ ddl	m
Z
mZ ddlmZ  ed          rd d	lmZ  ed
          rd dlmZ  ed          rd dlmZ d dlmZ  e
dd
d           G d dee                      ZdS )    N)ABCabstractmethod)List)logging   )class_requires_depsis_dep_available)AutoRegisterABCMetaClass	langchainDocumentzlangchain-text-splitters)RecursiveCharacterTextSplitterzlangchain-community)vectorstores)FAISSc                        e Zd ZdZdZdZ fdZed             Zed             Z	de
fdZd	e
defd
Zdede
fdZde
defdZdg dfdee
         dedee
         ddfdZddde
fdZde
ddfdZ	 	 	 d!dee
         dddededede
fd Z	 xZS )"BaseRetrieverzBase RetrieverTPADDLEX_VECTOR_STOREc                 d    t                                                       d| _        d| _        dS )z*Initializes an instance of base retriever.N)super__init__
model_name	embedding)self	__class__s    u/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddlex/inference/pipelines/components/retriever/base.pyr   zBaseRetriever.__init__)   s+        c                      t          d          )z
        Declaration of an abstract method. Subclasses are expected to
        provide a concrete implementation of generate_vector_database.
        zCThe method `generate_vector_database` has not been implemented yet.NotImplementedErrorr   s    r   generate_vector_databasez&BaseRetriever.generate_vector_database/   s     "Q
 
 	
r   c                      t          d          )z
        Declaration of an abstract method. Subclasses are expected to
        provide a concrete implementation of similarity_retrieval.
        z?The method `similarity_retrieval` has not been implemented yet.r   r    s    r   similarity_retrievalz"BaseRetriever.similarity_retrieval9   s     "M
 
 	
r   returnc                     | j         S )zt
        Get the model name used for generating vectors.

        Returns:
            str: The model name.
        )r   r    s    r   get_model_namezBaseRetriever.get_model_nameC   s     r   sc                 6    |                     | j                  S )z
        Check if the given string starts with the vector store prefix.

        Args:
            s (str): The input string to check.

        Returns:
            bool: True if the string starts with the vector store prefix, False otherwise.
        )
startswithVECTOR_STORE_PREFIX)r   r'   s     r   is_vector_storezBaseRetriever.is_vector_storeL   s     ||D4555r   vector_store_bytesc                 `    | j         t          j        |                              d          z   S )z
        Encode the vector store bytes into a base64 string prefixed with a specific prefix.

        Args:
            vector_store_bytes (bytes): The bytes to encode.

        Returns:
            str: The encoded string with the prefix.
        ascii)r*   base64	b64encodedecode)r   r,   s     r   encode_vector_storez!BaseRetriever.encode_vector_storeX   s5     '&*:;M*N*N*U*U+
 +
 
 	
r   vector_store_strc                 ^    t          j        |t          | j                  d                   S )a  
        Decodes the vector store string by removing the prefix and decoding the base64 encoded string.

        Args:
            vector_store_str (str): The vector store string with a prefix.

        Returns:
            bytes: The decoded vector store data.
        N)r/   	b64decodelenr*   )r   r3   s     r   decode_vector_storez!BaseRetriever.decode_vector_storef   s,      0T5M1N1N1P1P QRRRr   i,  )	
u   。z

 	text_list
block_size
separatorsr   c                     t          |d|          }|                    d                    |                    }d |D             }	 t          j        || j                  }n# t          $ r d}Y nw xY w|S )a  
        Generates a vector database from a list of texts.

        Args:
            text_list (list[str]): A list of texts to generate the vector database from.
            block_size (int): The size of each chunk to split the text into.
            separators (list[str]): A list of separators to use when splitting the text.

        Returns:
            FAISS: The generated vector database.

        Raises:
            ValueError: If an unsupported API type is configured.
           )
chunk_sizechunk_overlapr=   r8   c                 .    g | ]}t          |           S )page_contentr   ).0texts     r   
<listcomp>z:BaseRetriever.generate_vector_database.<locals>.<listcomp>   s#    DDDdhD111DDDr   )	documentsr   N)r   
split_textjoinr   from_documentsr   
ValueError)r   r;   r<   r=   text_splittertexts
all_splitsvectorstores           r   r!   z&BaseRetriever.generate_vector_databaser   s    ( 7!

 
 
 ((9)=)=>>DDeDDD
	.$  KK  	 	 	KKK	 s   A$ $A32A3rP   c                 h    || j         }n'|                     |                                          }|S )z
        Encode the vector store serialized to bytes.

        Args:
            vectorstore (FAISS): The vector store to be serialized and encoded.

        Returns:
            str: The encoded vector store.
        )r*   r2   serialize_to_bytes)r   rP   s     r   encode_vector_store_to_bytesz*BaseRetriever.encode_vector_store_to_bytes   s9     2KK22;3Q3Q3S3STTKr   c                     |                      |          st          d          |                     |          }|dk    rt          j        d           dS t
          j                            || j        d          }|S )a  
        Decode a vector store from bytes according to the specified API type.

        Args:
            vectorstore (str): The serialized vector store string.

        Returns:
            FAISS: Deserialized vector store object.

        Raises:
            ValueError: If the retrieved vector store is not for PaddleX
            or if an unsupported API type is specified.
        z-The retrieved vectorstore is not for PaddleX.r   z5The retrieved vectorstore is empty,will empty vector.NT)
embeddingsallow_dangerous_deserialization)	r+   rL   r7   r   warningr   r   deserialize_from_bytesr   )r   rP   vectors      r   decode_vector_store_from_bytesz,BaseRetriever.decode_vector_store_from_bytes   s     ##K00 	NLMMM..{;;#OSTTT4#::~,0 ; 
 

 r         ?     query_text_list
sleep_timetopkmin_charactersc                 4   d}||S |D ]}|}t          j        |           |                    ||          }	d |	D             }
t          |
d           }
|
ddd         D ]5\  }}|dk    r*t	          |          t	          |          z   |k    r n||z  }6|S )	a  
        Retrieve similar contexts based on a list of query texts.

        Args:
            query_text_list (list[str]): A list of query texts to search for similar contexts.
            vectorstore (FAISS): The vector store where to perform the similarity search.
            sleep_time (float): The time to sleep between each query, in seconds. Default is 0.5.
            topk (int): The number of results to retrieve per query. Default is 2.
            min_characters (int): The minimum number of characters required for text processing, defaults to 3500.
        Returns:
            str: A concatenated string of all unique contexts found.
        r:   N)kc                 &    g | ]\  }}|j         |fS  rC   )rE   documentscores      r   rG   z6BaseRetriever.similarity_retrieval.<locals>.<listcomp>   s$    RRR/(E-u5RRRr   c                     | d         S )N   re   )xs    r   <lambda>z4BaseRetriever.similarity_retrieval.<locals>.<lambda>   s
    AaD r   )keyg)timesleep'similarity_search_with_relevance_scoressortedr6   )r   r^   rP   r_   r`   ra   all_C
query_textQUESTIONdocscontextrF   rg   s                r   r#   z"BaseRetriever.similarity_retrieval   s    ( L) 
	" 
	"J!HJz"""FFxSWFXXDRRTRRRGW..999G&ttt} " "eD==5zzCII->>TMEr   )r[   r\   r]   )__name__
__module____qualname____doc___BaseRetriever__is_baser*   r   r   r!   r#   strr&   boolr+   bytesr2   r7   r   intrS   rZ   float__classcell__)r   s   @r   r   r   !   s       I0     
 
 ^
 
 
 ^
    
6 
6 
6 
6 
6 
6
e 
 
 
 
 

SC 
SE 
S 
S 
S 
S  ? ? ?	! !9! ! I	!
 
! ! ! !F C     # '    D  "" "c" " 	"
 " " 
" " " " " " " "r   r   )	metaclass)r/   rn   abcr   r   typingr   paddlex.utilsr   
utils.depsr   r	   utils.subclass_registerr
   langchain_core.documentsr   langchain_text_splittersr   langchain_communityr    langchain_community.vectorstoresr   r   re   r   r   <module>r      si     # # # # # # # #       ! ! ! ! ! ! A A A A A A A A A A A A A AK   2111111.// HGGGGGG)** 7000000666666 ["<>STTC C C C CC#; C C C UTC C Cr   