
    }-jC                         d dl Z d dlZd dlZddlmZ ddlmZmZm	Z	 ddl
mZmZmZmZmZ  e            rd dlZddlmZmZ  G d d	e          Z e ed
                     G d de                      ZdS )    N   )GenerationConfig)add_end_docstringsis_torch_availablerequires_backends   )ArgumentHandlerDatasetPipelinePipelineExceptionbuild_pipeline_init_args),MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES0MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMESc                       e Zd ZdZddZdS )%TableQuestionAnsweringArgumentHandlerzB
    Handles arguments for the TableQuestionAnsweringPipeline
    Nc                    t          | d           dd l}|t          d          |]t          |t                    r/|                    d          |                    d          |g}nt          |t                    rt          |          dk    rt          d |D                       st          dd |D                        |d                             d          |d                             d          |}nt          d	|d         	                                 d
          t          t          |t                    st          |t          j                  r|S t          dt          |           d          ||dg}|D ]R}t          |d         |j                  s5|d         t          d          |                    |d                   |d<   S|S )Npandasr   z(Keyword argument `table` cannot be None.querytablec              3   @   K   | ]}t          |t                    V  d S N)
isinstancedict.0ds     o/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/transformers/pipelines/table_question_answering.py	<genexpr>zATableQuestionAnsweringArgumentHandler.__call__.<locals>.<genexpr>-   s,      >>1:a..>>>>>>    z:Keyword argument `table` should be a list of dict, but is c              3   4   K   | ]}t          |          V  d S r   )typer   s     r   r   zATableQuestionAnsweringArgumentHandler.__call__.<locals>.<genexpr>/   s-      UmUmbcVZ[\V]V]UmUmUmUmUmUmr   zIf keyword argument `table` is a list of dictionaries, each dictionary should have a `table` and `query` key, but only dictionary has keys z `table` and `query` keys.zZInvalid input. Keyword argument `table` should be either of type `dict` or `list`, but is ))r   r   zTable cannot be None.)r   r   
ValueErrorr   r   getlistlenallkeysr
   typesGeneratorTyper!   	DataFrame)selfr   r   kwargspdtqa_pipeline_inputstqa_pipeline_inputs          r   __call__z.TableQuestionAnsweringArgumentHandler.__call__   s2    	$)))=GHHH]%&& 599W+=+=+IeiiX_N`N`Nl',g##E4(( SZZ!^^>>>>>>> $oUmUmglUmUmUmoo   8<<((4qg9N9N9Z*/''$vJOPQ(--//v v v   $E7)C)C$zRWY^YlGmGm$ )u++) ) )  
 .3U#C#C"D"5 	X 	X092<HH X%g.6$%<===.0ll;Mg;V.W.W"7+""r   )NN)__name__
__module____qualname____doc__r1    r   r   r   r      s2         -# -# -# -# -# -#r   r   T)has_tokenizerc                        e Zd ZdZdZdZdZdZdZdZ	 e
d          Z e            f fd	Zd Zd	 Z fd
ZddZddZddZd Z xZS )TableQuestionAnsweringPipelinea  
    Table Question Answering pipeline using a `ModelForTableQuestionAnswering`. This pipeline is only available in
    PyTorch.

    Unless the model you're using explicitly sets these generation parameters in its configuration files
    (`generation_config.json`), the following default values will be used:
    - max_new_tokens: 256

    Example:

    ```python
    >>> from transformers import pipeline

    >>> oracle = pipeline(model="google/tapas-base-finetuned-wtq")
    >>> table = {
    ...     "Repository": ["Transformers", "Datasets", "Tokenizers"],
    ...     "Stars": ["36542", "4512", "3934"],
    ...     "Contributors": ["651", "77", "34"],
    ...     "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
    ... }
    >>> oracle(query="How many stars does the transformers repository have?", table=table)
    {'answer': 'AVERAGE > 36542', 'coordinates': [(0, 1)], 'cells': ['36542'], 'aggregator': 'AVERAGE'}
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This tabular question answering pipeline can currently be loaded from [`pipeline`] using the following task
    identifier: `"table-question-answering"`.

    The models that this pipeline can use are models that have been fine-tuned on a tabular question answering task.
    See the up-to-date list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=table-question-answering).
    ztable,queryTF   )max_new_tokensc                     t                      j        di | || _        t          j                    }|                    t                     |                     |           t          | j	        j
        dd           ot          | j	        j
        dd           | _        t          | j	        j
        d          rdnd | _        d S )Naggregation_labelsnum_aggregation_labelstapasr6   )super__init___args_parserr   copyupdater   check_model_typegetattrmodelconfig	aggregatehasattrr!   )r,   args_parserr-   mapping	__class__s       r   rA   z'TableQuestionAnsweringPipeline.__init__}   s    ""6"""'BGIICDDDg&&& !24H$OO 
T[J7U
 U
  'tz'8:NOOYGGUY			r   c                      | j         di |S )Nr6   )rG   )r,   inputss     r   batch_inferencez.TableQuestionAnsweringPipeline.batch_inference   s    tz##F###r   c                    g }g }d}|d         j         d         }|d                             | j                  }|d                             | j                  }|d                             | j                  }d}	t          |          D ]h}
|W|	dddf         }t	          j        |                                                                          }||
         }	t          |j         d                   D ]}|	dddf                                         |         }|	dddf                                         |         dz
  }|	dddf                                         |         dz
  }|dk    r&|dk    r |dk    rt          |||f                   ||<   t          j        |                              t          j                                      | j                  |	dddf<   ||
         }||
         }||
         }	|                     |                    d          |                    d          |	                    d          	          }|j        }| j        r|                    |j                   |                    |           t          j                            |
          }|j        |                    t          j                                      |j        j                  z  }t1          j        t4                    t7          |                                                                          D ]\  }}|	dddf                                         |         }|	dddf                                         |         dz
  }|	dddf                                         |         dz
  }|dk    r)|dk    r#|dk    r||f                             |           fdD             }jt          j        t=          |          d          }| j        s|fn#|t          j        t=          |          d          fS )z
        Inference used for models that need to process sequences in a sequential fashion, like the SQA models which
        handle conversational query related to a table.
        N	input_idsr   attention_masktoken_type_ids   r   r   )rR   rS   rT   )logitsc                 r    i | ]3}|t          j        |                                                   d k    4S )g      ?)nparraymean)r   keycoords_to_probss     r   
<dictcomp>zGTableQuestionAnsweringPipeline.sequential_inference.<locals>.<dictcomp>   s<    hhhQTC/#*>!?!?!D!D!F!F!Lhhhr   )shapetodevicerangerX   
zeros_likecpunumpytolistinttorch
from_numpyr!   longrG   	unsqueezerV   rI   appendlogits_aggregationdistributions	Bernoulliprobsfloat32collectionsdefaultdictr%   	enumeratesqueezecattuple)r,   rO   
all_logitsall_aggregationsprev_answers
batch_sizerR   rS   rT   token_type_ids_exampleindexprev_labels_examplemodel_labelsi
segment_idcol_idrow_idinput_ids_exampleattention_mask_exampleoutputsrV   dist_per_tokenprobabilitiespcolrowlogits_batchr\   s                              @r   sequential_inferencez3TableQuestionAnsweringPipeline.sequential_inference   s?   
 
K(.q1
;'**4;77	 0144T[AA 0144T[AA!%:&& .	i .	iE '&<QQQT&B#!}-@-D-D-F-F-L-L-N-NOO)7)>&|1!455 N NA!71!=!D!D!F!Fq!IJ3AAAqD9@@BB1EIF3AAAqD9@@BB1EIF{{v{{zQ*-lFF;K.L*M*MQ/4/?/M/M/R/RSXS]/^/^/a/abfbm/n/n&qqq!t, )% 0%3E%:"%3E%:"jj+55a885??BB5??BB !  G
 ^F~ D ''(BCCCf%%%"0::&:IIN*03I3N3Nu}3]3]3`3`$+4 4 M *5d;;O!-"7"7"9"9"@"@"B"BCC : :13AAAqD9@@BB1E
,QQQT299;;A>B,QQQT299;;A>B!88qZ1__#S#J/66q999hhhhXghhhLLyz!2!2A66&*no<SXYiSjSjlmInIn:oor   c                      | j         |i |} t                      j        |fi |}t          |          dk    r|d         S |S )a  
        Answers queries according to a table. The pipeline accepts several types of inputs which are detailed below:

        - `pipeline(table, query)`
        - `pipeline(table, [query])`
        - `pipeline(table=table, query=query)`
        - `pipeline(table=table, query=[query])`
        - `pipeline({"table": table, "query": query})`
        - `pipeline({"table": table, "query": [query]})`
        - `pipeline([{"table": table, "query": query}, {"table": table, "query": query}])`

        The `table` argument should be a dict or a DataFrame built from that dict, containing the whole table:

        Example:

        ```python
        data = {
            "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
            "age": ["56", "45", "59"],
            "number of movies": ["87", "53", "69"],
            "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
        }
        ```

        This dictionary can be passed in as such, or can be converted to a pandas DataFrame:

        Example:

        ```python
        import pandas as pd

        table = pd.DataFrame.from_dict(data)
        ```

        Args:
            table (`pd.DataFrame` or `Dict`):
                Pandas DataFrame or dictionary that will be converted to a DataFrame containing all the table values.
                See above for an example of dictionary.
            query (`str` or `list[str]`):
                Query or list of queries that will be sent to the model alongside the table.
            sequential (`bool`, *optional*, defaults to `False`):
                Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the
                inference to be done sequentially to extract relations within sequences, given their conversational
                nature.
            padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
                Activates and controls padding. Accepts the following values:

                - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
                  sequence if provided).
                - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
                  acceptable input length for the model if that argument is not provided.
                - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
                  lengths).

            truncation (`bool`, `str` or [`TapasTruncationStrategy`], *optional*, defaults to `False`):
                Activates and controls truncation. Accepts the following values:

                - `True` or `'drop_rows_to_fit'`: Truncate to a maximum length specified with the argument `max_length`
                  or to the maximum acceptable input length for the model if that argument is not provided. This will
                  truncate row by row, removing rows from the table.
                - `False` or `'do_not_truncate'` (default): No truncation (i.e., can output batch with sequence lengths
                  greater than the model maximum admissible input size).


        Return:
            A dictionary or a list of dictionaries containing results: Each result is a dictionary with the following
            keys:

            - **answer** (`str`) -- The answer of the query given the table. If there is an aggregator, the answer will
              be preceded by `AGGREGATOR >`.
            - **coordinates** (`list[tuple[int, int]]`) -- Coordinates of the cells of the answers.
            - **cells** (`list[str]`) -- List of strings made up of the answer cell values.
            - **aggregator** (`str`) -- If the model has an aggregator, this returns the aggregator.
        r   r   )rB   r@   r1   r&   )r,   argsr-   pipeline_inputsresultsrM   s        r   r1   z'TableQuestionAnsweringPipeline.__call__   s[    V ,$+T<V<<"%''"?==f==w<<11:r   Nc                     i }|||d<   |||d<   i }|||d<   t          | dd           
| j        |d<   t          | dd           | j        |d<   | j        |d<   ||i fS )Npadding
truncation
sequentialassistant_modelassistant_tokenizer	tokenizer)rF   r   r   r   )r,   r   r   r   r-   preprocess_paramsforward_paramss          r   _sanitize_parametersz3TableQuestionAnsweringPipeline._sanitize_parameters"  s    +2i(!.8l+!+5N<(4*D11=040DN,-4.55A*..N;'484LN01 ."44r   c                     || j         dk    rd}nd}|d         |d         }}|j        rt          d          ||dk    rt          d          |                     ||d	||
          }||d<   |S )Nr?   drop_rows_to_fitdo_not_truncater   r   ztable is empty zquery is emptypt)return_tensorsr   r   )r!   emptyr#   r   )r,   pipeline_inputr   r   r   r   rO   s          r   
preprocessz)TableQuestionAnsweringPipeline.preprocess5  s    yG##/

.
%g.w0Gu; 	/-...=ERKK-...uTjbijjwr   c                     |                     d          }| j        dk    r|r | j        di |}n0 | j        di |}n"d|vr
| j        |d<    | j        j        di ||}|||d}|S )Nr   r?   generation_config)model_inputsr   r   r6   )popr!   r   rP   r   rG   generate)r,   r   r   generate_kwargsr   r   model_outputss          r   _forwardz'TableQuestionAnsweringPipeline._forwardE  s      ))9 ?3$3CClCC.$.>>>> #/997;7M 34)dj)LLLLOLLG)5RYZZr   c                     |d         }|d         |d         } j         dk    rq j        rw|d d         \  }} j                            |||          }|\  }} fdt	          |          D              j        j        j        fdt	          |          D             }	n/|d         } j                            ||          }|d         }i i }	g }
t	          |          D ]\  }}fd	|D             }                    |d
          }|	                    |d
          }|d	                    |          z   |fd|D             d}|r||d<   |

                    |           t          |          dk    rt          d j        j        d          n&d  j                            |d          D             }
t          |
          dk    r|
n|
d         S )Nr   r   r   r?   r   c                 F    i | ]\  }}|j         j        j        |         S r6   )rG   rH   r=   )r   r   predr,   s      r   r]   z>TableQuestionAnsweringPipeline.postprocess.<locals>.<dictcomp>_  s/    wwwQXQRTXq$*"3"Ft"Lwwwr   c                 :    i | ]\  }}|k    ||         d z   S )z > r6   )r   r   r   aggregatorsno_agg_label_indexs      r   r]   z>TableQuestionAnsweringPipeline.postprocess.<locals>.<dictcomp>b  s;     & & &29!T[_cu[u[uA{1~-[u[u[ur   r   c                 *    g | ]}j         |         S r6   iatr   
coordinater   s     r   
<listcomp>z>TableQuestionAnsweringPipeline.postprocess.<locals>.<listcomp>m  s     MMM::.MMMr   r   z, c                 *    g | ]}j         |         S r6   r   r   s     r   r   z>TableQuestionAnsweringPipeline.postprocess.<locals>.<listcomp>s  s     RRR
ei
3RRRr   )answercoordinatescells
aggregatorzTable question answeringzEmpty answerc                     g | ]}d |iS )r   r6   )r   r   s     r   r   z>TableQuestionAnsweringPipeline.postprocess.<locals>.<listcomp>|  s    wwwf&)wwwr   T)skip_special_tokensr   )r!   rI   r   convert_logits_to_predictionsrs   rG   rH   no_aggregation_label_indexr$   joinrk   r&   r   name_or_pathbatch_decode)r,   r   rO   r   rV   
logits_aggpredictionsanswer_coordinates_batchagg_predictionsaggregators_prefixanswersr|   r   r   r   aggregator_prefixr   r   r   r   s   `                @@@r   postprocessz*TableQuestionAnsweringPipeline.postprocessV  sV   ~.g&	*9~ (%,RaR["
"nJJ6SY[eff<G9(/wwww\efu\v\vwww%)Z%6%Q"& & & & &=F=W=W& & &"" !"nJJ6SYZZ+6q>( %'"G&/0H&I&I ' '"{MMMMMMM(__UB77
$6$:$:5"$E$E!/$))E2B2BB#.RRRRkRRR 
  6+5F<(v&&&&6{{a'(BDJD[]klll   xw8S8ST[qu8S8v8vwwwGg,,**ww
:r   )NNN)TN)F)r2   r3   r4   r5   default_input_names_pipeline_calls_generate_load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   _default_generation_configr   rA   rP   r   r1   r   r   r   r   __classcell__)rM   s   @r   r9   r9   M   s+          D (#O!#O!1!1" " " $I#H#J#J Z Z Z Z Z Z$ $ $Ap Ap ApFP P P P Pd5 5 5 5&       "(; (; (; (; (; (; (;r   r9   )rq   r)   rd   rX   
generationr   utilsr   r   r   baser	   r
   r   r   r   rg   models.auto.modeling_autor   r   r   r9   r6   r   r   <module>r      sl            ) ) ) ) ) )         
 b a a a a a a a a a a a a a  LLL       2# 2# 2# 2# 2#O 2# 2# 2#j ,,4@@@AAp; p; p; p; p;X p; p; BAp; p; p;r   