
    .i                     \   d dl Z d dlZd dlmZmZmZmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZmZ d dlmZ d dlmZmZ d dlmZ d dlmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(  ej)        e*          Z+ G d	 d
          Z, G d d          Z- G d d          Z. G d d          Z/ G d de.          Z0 G d de0          Z1 G d de1          Z2 G d de1          Z3 G d de0          Z4 G d de.e/          Z5 G d de0e/          Z6 ede.           Z7 G d! d"e0ee7                   Z8 G d# d$e8e7                   Z9 G d% d&e9e7         e/          Z:ee6e5f         Z; G d' d(e:e;                   Z< G d) d*e<          Z= G d+ d,e<          Z> G d- d.e:e<                   Z? G d/ d0e?          Z@ G d1 d2e?          ZAee?d3f         ZB G d4 d3e:eB                   ZC G d5 d6eC          ZD G d7 d8eC          ZE G d9 d:e8e0                   ZF G d; d<eF          ZG G d= d>eF          ZHdS )?    N)DictGenericIterableIteratorListOptionalSequenceSetTupleTypeVarUnioncast)PDFColorSpace)PDFTypeErrorPDFValueError)PDFFont)ColorPDFGraphicState)	PDFStream)INFLTComponentTMatrixPathSegmentPlanePointRectapply_matrix_rectbbox2strfsplit	get_bound
matrix2struniqc                   (    e Zd Zd	deddfdZd
dZdS )IndexAssignerr   indexreturnNc                     || _         d S Nr%   )selfr%   s     K/var/www/html/analyses/venv/lib/python3.11/site-packages/pdfminer/layout.py__init__zIndexAssigner.__init__+   s    


    objLTItemc                     t          |t                    r| j        |_        | xj        dz  c_        d S t          |t                    r|D ]}|                     |           d S d S N   )
isinstance	LTTextBoxr%   LTTextGrouprun)r*   r.   xs      r+   r6   zIndexAssigner.run.   sv    c9%% 	
CIJJ!OJJJJ[)) 	  	 	 r-   r   )r.   r/   r&   N)__name__
__module____qualname__intr,   r6    r-   r+   r$   r$   *   sO         c $         r-   r$   c                   j    e Zd ZdZ	 	 	 	 	 	 	 ddededed	ed
ee         dededdfdZddZde	fdZ
dS )LAParamsa  Parameters for layout analysis

    :param line_overlap: If two characters have more overlap than this they
        are considered to be on the same line. The overlap is specified
        relative to the minimum height of both characters.
    :param char_margin: If two characters are closer together than this
        margin they are considered part of the same line. The margin is
        specified relative to the width of the character.
    :param word_margin: If two characters on the same line are further apart
        than this margin then they are considered to be two separate words, and
        an intermediate space will be added for readability. The margin is
        specified relative to the width of the character.
    :param line_margin: If two lines are are close together they are
        considered to be part of the same paragraph. The margin is
        specified relative to the height of a line.
    :param boxes_flow: Specifies how much a horizontal and vertical position
        of a text matters when determining the order of text boxes. The value
        should be within the range of -1.0 (only horizontal position
        matters) to +1.0 (only vertical position matters). You can also pass
        `None` to disable advanced layout analysis, and instead return text
        based on the position of the bottom left corner of the text box.
    :param detect_vertical: If vertical text should be considered during
        layout analysis
    :param all_texts: If layout analysis should be performed on text in
        figures.
          ?       @皙?Fline_overlapchar_marginline_marginword_margin
boxes_flowdetect_vertical	all_textsr&   Nc                     || _         || _        || _        || _        || _        || _        || _        |                                  d S r(   )rC   rD   rE   rF   rG   rH   rI   	_validate)r*   rC   rD   rE   rF   rG   rH   rI   s           r+   r,   zLAParams.__init__S   sP     )&&&$."r-   c                     | j         hd}t          | j         t                    s)t          | j         t                    st	          |          d| j         cxk    rdk    sn t          |          d S d S )Nz@LAParam boxes_flow should be None, or a number between -1 and +1r2   )rG   r3   r<   floatr   r   )r*   boxes_flow_err_msgs     r+   rK   zLAParams._validateg   s    ?&R  4?C0074>tPU4V4V7 ##5666----A----#$6777 '& .-r-   c                 <    d| j         | j        | j        | j        fz  S )NzM<LAParams: char_margin=%.1f, line_margin=%.1f, word_margin=%.1f all_texts=%r>)rD   rE   rF   rI   r*   s    r+   __repr__zLAParams.__repr__s   s)    -!143CT^TU	
r-   )r@   rA   r@   rB   r@   FFr&   N)r9   r:   r;   __doc__rN   r   boolr,   rK   strrR   r=   r-   r+   r?   r?   7   s         : "   &) %   	
  UO   
   (
8 
8 
8 
8
# 
 
 
 
 
 
r-   r?   c                   "    e Zd ZdZdeddfdZdS )r/   z)Interface for things that can be analyzedlaparamsr&   Nc                     dS )zPerform the layout analysis.Nr=   r*   rX   s     r+   analyzezLTItem.analyze~   s      r-   )r9   r:   r;   rT   r?   r[   r=   r-   r+   r/   r/   {   s<        33+ +T + + + + + +r-   r/   c                   *    e Zd ZdZdefdZdefdZdS )LTTextz#Interface for things that have textr&   c                 L    d| j         j         d|                                 dS N< >)	__class__r9   get_textrQ   s    r+   rR   zLTText.__repr__   s(    A4>*AAT]]__AAAAr-   c                     t           )zText contained in this objectNotImplementedErrorrQ   s    r+   rd   zLTText.get_text   s    !!r-   N)r9   r:   r;   rT   rV   rR   rd   r=   r-   r+   r]   r]      sU        --B# B B B B"# " " " " " "r-   r]   c                       e Zd ZdZdeddfdZdefdZdede	fdZ
dede	fd	Zdede	fd
Zdede	fdZdeddfdZde	fdZdd de	fdZdd defdZdd defdZdd de	fdZdd defdZdd defdZdS )LTComponentzObject with a bounding boxbboxr&   Nc                 d    t                               |            |                     |           d S r(   )r/   r,   set_bboxr*   rj   s     r+   r,   zLTComponent.__init__   s,    dr-   c                 L    d| j         j         dt          | j                   dS r_   )rc   r9   r   rj   rQ   s    r+   rR   zLTComponent.__repr__   s*    C4>*CCXdi-@-@CCCCr-   _c                     t           r(   r   r*   ro   s     r+   __lt__zLTComponent.__lt__       r-   c                     t           r(   rq   rr   s     r+   __le__zLTComponent.__le__   rt   r-   c                     t           r(   rq   rr   s     r+   __gt__zLTComponent.__gt__   rt   r-   c                     t           r(   rq   rr   s     r+   __ge__zLTComponent.__ge__   rt   r-   c                     |\  }}}}|| _         || _        || _        || _        ||z
  | _        ||z
  | _        || _        d S r(   )x0y0x1y1widthheightrj   )r*   rj   r|   r}   r~   r   s         r+   rl   zLTComponent.set_bbox   sM    RR"W
2g			r-   c                 .    | j         dk    p
| j        dk    S Nr   )r   r   rQ   s    r+   is_emptyzLTComponent.is_empty   s    zQ2$+"22r-   r.   c                     t          |t                    s$J t          t          |                                |j        | j        k    o| j        |j        k    S r(   )r3   ri   rV   typer|   r~   r*   r.   s     r+   is_hoverlapzLTComponent.is_hoverlap   I    #{++;;Sc^^;;;v 6TW%66r-   c                 *   t          |t                    s$J t          t          |                                |                     |          rdS t          t          | j        |j        z
            t          | j        |j        z
                      S r   	r3   ri   rV   r   r   minabsr|   r~   r   s     r+   	hdistancezLTComponent.hdistance   z    #{++;;Sc^^;;;C   	E1s47SV+,,c$'CF2B.C.CDDDr-   c                 *   t          |t                    s$J t          t          |                                |                     |          rDt          t          | j        |j        z
            t          | j        |j        z
                      S dS r   r   r   s     r+   hoverlapzLTComponent.hoverlap   y    #{++;;Sc^^;;;C   	s47SV+,,c$'CF2B.C.CDDD1r-   c                     t          |t                    s$J t          t          |                                |j        | j        k    o| j        |j        k    S r(   )r3   ri   rV   r   r}   r   r   s     r+   is_voverlapzLTComponent.is_voverlap   r   r-   c                 *   t          |t                    s$J t          t          |                                |                     |          rdS t          t          | j        |j        z
            t          | j        |j        z
                      S r   	r3   ri   rV   r   r   r   r   r}   r   r   s     r+   	vdistancezLTComponent.vdistance   r   r-   c                 *   t          |t                    s$J t          t          |                                |                     |          rDt          t          | j        |j        z
            t          | j        |j        z
                      S dS r   r   r   s     r+   voverlapzLTComponent.voverlap   r   r-   )r9   r:   r;   rT   r   r,   rV   rR   objectrU   rs   rv   rx   rz   rl   r   r   rN   r   r   r   r   r   r=   r-   r+   ri   ri      s       $$T d    D# D D D D 4     4     4     4    T d    3$ 3 3 3 37} 7 7 7 7 7E] Eu E E E EM e    7} 7 7 7 7 7E] Eu E E E EM e      r-   ri   c                       e Zd ZdZ	 	 	 	 	 	 	 ddedee         dededed	ee	         d
ee	         deee
                  deeeef                  ddfdZdefdZdS )LTCurvezA generic Bezier curve

    The parameter `original_path` contains the original
    pathing information from the pdf (e.g. for reconstructing Bezier Curves).

    `dashing_style` contains the Dashing information if any.
    FN	linewidthptsstrokefillevenoddstroking_colornon_stroking_colororiginal_pathdashing_styler&   c
                     t                               | t          |                     || _        || _        || _        || _        || _        || _        || _	        || _
        |	| _        d S r(   )ri   r,   r    r   r   r   r   r   r   r   r   r   )
r*   r   r   r   r   r   r   r   r   r   s
             r+   r,   zLTCurve.__init__   si     	T9S>>222"	,"4**r-   c                 J    d                     d | j        D                       S )N,c              3       K   | ]	}d |z  V  
dS )z	%.3f,%.3fNr=   ).0ps     r+   	<genexpr>z"LTCurve.get_pts.<locals>.<genexpr>   s&      ::Aa::::::r-   )joinr   rQ   s    r+   get_ptszLTCurve.get_pts   s%    xx::::::::r-   FFFNNNN)r9   r:   r;   rT   rN   r   r   rU   r   r   r   r   r   r,   rV   r   r=   r-   r+   r   r      s          *..2599=+ ++ %[+ 	+
 + + !+ %UO+  [ 12+  ffn 56+ 
+ + + +.; ; ; ; ; ; ;r-   r   c                       e Zd ZdZ	 	 	 	 	 	 	 ddededededed	ed
ee         dee         dee	e
                  deeeef                  ddfdZdS )LTLinezOA single straight line.

    Could be used for separating text or figures.
    FNr   p0p1r   r   r   r   r   r   r   r&   c                 P    t                               | |||g||||||	|

  
         d S r(   r   r,   )r*   r   r   r   r   r   r   r   r   r   r   s              r+   r,   zLTLine.__init__   sH     	H	
 	
 	
 	
 	
r-   r   )r9   r:   r;   rT   rN   r   rU   r   r   r   r   r   r   r,   r=   r-   r+   r   r      s          *..2599=
 

 
 	

 
 
 
 !
 %UO
  [ 12
  ffn 56
 

 
 
 
 
 
r-   r   c                       e Zd ZdZ	 	 	 	 	 	 	 ddededededed	ee         d
ee         dee	e
                  deeeef                  ddfdZdS )LTRectzMA rectangle.

    Could be used for framing another pictures or figures.
    FNr   rj   r   r   r   r   r   r   r   r&   c
                 r    |\  }
}}}t                               | ||
|f||f||f|
|fg|||||||	
  
         d S r(   r   )r*   r   rj   r   r   r   r   r   r   r   r|   r}   r~   r   s                 r+   r,   zLTRect.__init__!  sk      RR"XBx"bB84	
 	
 	
 	
 	
r-   r   )r9   r:   r;   rT   rN   r   rU   r   r   r   r   r   r   r,   r=   r-   r+   r   r     s          *..2599=
 

 
 	

 
 
 !
 %UO
  [ 12
  ffn 56
 

 
 
 
 
 
r-   r   c                   6    e Zd ZdZdedededdfdZdefdZdS )	LTImagezKAn image object.

    Embedded images can be in JPEG, Bitmap or JBIG2.
    namestreamrj   r&   Nc                    t                               | |           || _        || _        |                    d          |                    d          f| _        |                    d          | _        |                    dd          | _        |                    d          | _        t          | j        t                    s| j        g| _        d S d S )N)WWidth)HHeight)IM	ImageMask)BPCBitsPerComponentr2   )CS
ColorSpace)ri   r,   r   r   get_anysrcsize	imagemaskbits
colorspacer3   list)r*   r   r   rj   s       r+   r,   zLTImage.__init__B  s    T4(((	~668W8WX(;<<NN#>BB	 ..)=>>$/400 	0#/DOOO	0 	0r-   c           	      l    d| j         j         d| j         dt          | j                   d| j        d	S Nr`   () ra   rb   )rc   r9   r   r   rj   r   rQ   s    r+   rR   zLTImage.__repr__M  s@    a4>*aaTYaa(49:M:MaaPTP\aaaar-   )	r9   r:   r;   rT   rV   r   r   r,   rR   r=   r-   r+   r   r   <  sp         
	0S 	0) 	04 	0D 	0 	0 	0 	0b# b b b b b br-   r   c                   .    e Zd ZdZdeddfdZdefdZdS )LTAnnoa  Actual letter in the text as a Unicode string.

    Note that, while a LTChar object has actual boundaries, LTAnno objects does
    not, as these are "virtual" characters, inserted by a layout analyzer
    according to the relationship between two characters (e.g. a space).
    textr&   Nc                     || _         d S r(   _text)r*   r   s     r+   r,   zLTAnno.__init__Y  s    


r-   c                     | j         S r(   r   rQ   s    r+   rd   zLTAnno.get_text\  
    zr-   )r9   r:   r;   rT   rV   r,   rd   r=   r-   r+   r   r   Q  s[         S T    #      r-   r   c                       e Zd ZdZdededededededed	eee	e
e         ef         f         d
ededdfdZdefdZdefdZdS )LTCharz.Actual letter in the text as a Unicode string.matrixfontfontsizescalingriser   	textwidthtextdispncsgraphicstater&   Nc                 
   t                               |            || _        || _        |j        | _        |	| _        |
| _        ||z  |z  | _        |                                rQt          |t                    sJ |\  }}||dz  }n||z  dz  }d|z
  |z  dz  }| ||z   | j        z   | |z   ||z   f}n+|                                |z  }d||z   | j        ||z   |z   f}| j        \  }}}}}}||z  |z  dk    o||z  dk    | _        t          | j        |          \  }}}}||k     r||}}||k     r||}}t                              | ||||f           |                                r| j        | _        d S | j        | _        d S )Nr@   gMbP?i  r   )r]   r,   r   r   fontnamer   r   advis_verticalr3   tupleget_descentuprightr   ri   r   sizer   )r*   r   r   r   r   r   r   r   r   r   r   vxvyrj   descentabcdefr|   r}   r~   r   s                            r+   r,   zLTChar.__init__c  s    	
(x''1 	Lh.....HRz^(]U*)x'%/BCdTX-sX~rDyIDD &&((83Gw~tx4(1JKD![Aq!Q1uw*9q1uz,T[$??RR77BR77BRTBB#3444 	$
DIIIDIIIr-   c                     d| j         j         dt          | j                   dt	          | j                   d| j        d| j         d|                                 dS )Nr`   ra    matrix=z font=z adv=z text=rb   )	rc   r9   r   rj   r!   r   r   r   rd   rQ   s    r+   rR   zLTChar.__repr__  s     d4>*  d  dXdi-@-@  d  d*UYU`JaJa  d  dimiv  d  d  @D  @H  d  d  PT  P]  P]  P_  P_  d  d  d  	dr-   c                     | j         S r(   r   rQ   s    r+   rd   zLTChar.get_text  r   r-   )r9   r:   r;   rT   r   r   rN   rV   r   r   r   r   r   r,   rR   rd   r=   r-   r+   r   r   `  s        88.$.$ .$ 	.$
 .$ .$ .$ .$ uXe_e%;<<=.$ .$ &.$ 
.$ .$ .$ .$`d# d d d d#      r-   r   LTItemT)boundc                       e Zd ZdZdeddfdZdee         fdZde	fdZ
deddfd	Zd
ee         ddfdZdeddfdZdS )LTContainerz(Object that can be extended and analyzedrj   r&   Nc                 J    t                               | |           g | _        d S r(   )ri   r,   _objsrm   s     r+   r,   zLTContainer.__init__  s#    T4((($&


r-   c                 *    t          | j                  S r(   )iterr   rQ   s    r+   __iter__zLTContainer.__iter__  s    DJr-   c                 *    t          | j                  S r(   )lenr   rQ   s    r+   __len__zLTContainer.__len__  s    4:r-   r.   c                 :    | j                             |           d S r(   )r   appendr   s     r+   addzLTContainer.add  s    
#r-   objsc                 :    |D ]}|                      |           d S r(   )r  )r*   r  r.   s      r+   extendzLTContainer.extend  s,     	 	CHHSMMMM	 	r-   rX   c                 D    | j         D ]}|                    |           d S r(   )r   r[   r*   rX   r.   s      r+   r[   zLTContainer.analyze  s2    : 	" 	"CKK!!!!	" 	"r-   )r9   r:   r;   rT   r   r,   r   r   r  r<   r  r  r   r
  r?   r[   r=   r-   r+   r   r     s        22'T 'd ' ' ' ' (7+            w 4    8G,     " "T " " " " " "r-   r   c                   &    e Zd ZddZdeddfdZdS )LTExpandableContainerr&   Nc                 t    t                               | t          
 t          
 t           t           f           d S r(   )r   r,   r   rQ   s    r+   r,   zLTExpandableContainer.__init__  s/    TSD3$sd#;<<<<<r-   r.   c           	      T   t                               | t          t          |                     |                     t          | j        |j                  t          | j        |j                  t          | j	        |j	                  t          | j
        |j
                  f           d S r(   )r   r  r   r   rl   r   r|   r}   maxr~   r   r   s     r+   r  zLTExpandableContainer.add  s    d7C00111DGSV$$DGSV$$DGSV$$DGSV$$		
 	
 	
 	
 	
r-   rS   )r9   r:   r;   r,   ri   r  r=   r-   r+   r  r    sJ        = = = =
	
{ 	
t 	
 	
 	
 	
 	
 	
r-   r  c                   "    e Zd ZddZdefdZdS )LTTextContainerr&   Nc                 n    t                               |            t                              |            d S r(   )r]   r,   r  rQ   s    r+   r,   zLTTextContainer.__init__  s.    &&t,,,,,r-   c                 @    d                     d | D                       S )N c              3      K   | ]@}t          |t                    t          t          |                                          V  Ad S r(   )r3   r]   r   rd   )r   r.   s     r+   r   z+LTTextContainer.get_text.<locals>.<genexpr>  s[       
 
-0JsF<S<S
&&((
 
 
 
 
 
r-   )r   rQ   s    r+   rd   zLTTextContainer.get_text  s4    ww 
 
48
 
 
 
 
 	
r-   rS   )r9   r:   r;   r,   rV   rd   r=   r-   r+   r  r    sC        - - - -
# 
 
 
 
 
 
r-   r  c                        e Zd ZdZdeddf fdZdefdZdeddfdZ	d	e
e         d
eded          fdZdef fdZ xZS )
LTTextLinezContains a list of LTChar objects that represent a single text line.

    The characters are aligned either horizontally or vertically, depending on
    the text's writing mode.
    rF   r&   Nc                 V    t                                                       || _        d S r(   )superr,   rF   )r*   rF   rc   s     r+   r,   zLTTextLine.__init__  s'    &r-   c                 v    d| j         j         dt          | j                   d|                                 dS r_   )rc   r9   r   rj   rd   rQ   s    r+   rR   zLTTextLine.__repr__  s:    W4>*WWXdi-@-@WW4==??WWWWr-   rX   c                     | j         D ]}|                    |           t                              | t	          d                     d S )N
)r   r[   r   r  r   r  s      r+   r[   zLTTextLine.analyze  sH    : 	" 	"CKK!!!!fTll+++++r-   planeratioc                     t           r(   rf   )r*   r  r   s      r+   find_neighborszLTTextLine.find_neighbors  s
    
 "!r-   c                     t                                                      p%|                                                                 S r(   )r  r   rd   isspace)r*   rc   s    r+   r   zLTTextLine.is_empty  s2    ww!!>T]]__%<%<%>%>>r-   )r9   r:   r;   rT   rN   r,   rV   rR   r?   r[   r   r   r   r"  rU   r   __classcell__rc   s   @r+   r  r    s         'E 'd ' ' ' ' ' 'X# X X X X, ,T , , , ,
"\"" " 
l		" " " "?$ ? ? ? ? ? ? ? ? ? ?r-   r  c                        e Zd ZdeddfdZdeddf fdZdee         dede	e
         fd	ZddededefdZddededefdZ	 ddededefdZddededefdZ xZS )LTTextLineHorizontalrF   r&   Nc                 V    t                               | |           t          
 | _        d S r(   )r  r,   r   _x1r*   rF   s     r+   r,   zLTTextLineHorizontal.__init__  %    D+...$r-   r.   c                 T   t          |t                    rd| j        r]| j        t          |j        |j                  z  }| j        |j        |z
  k     r(t          	                    | t          d                     |j        | _        t                      	                    |           d S Nra   )r3   r   rF   r  r   r   r*  r|   r   r  r   r~   r  r*   r.   marginrc   s      r+   r  zLTTextLineHorizontal.add  s    c6"" 	3t'7 	3%CIsz(B(BBFx#&6/))fSkk2226Cr-   r  r   c                      | j         z  |                     j         j        z
   j         j        z   f          } fd|D             S )aB  Finds neighboring LTTextLineHorizontals in the plane.

        Returns a list of other LTTestLineHorizontals in the plane which are
        close to self. "Close" can be controlled by ratio. The returned objects
        will be the same height as self, and also either left-, right-, or
        centrally-aligned.
        c                     g | ]u}t          |t                                        |           .                    |           s.                    |           s                    |           s|vS )	tolerance)r3   r(  _is_same_height_as_is_left_aligned_with_is_right_aligned_with_is_centrally_aligned_withr   r.   r   r*   s     r+   
<listcomp>z7LTTextLineHorizontal.find_neighbors.<locals>.<listcomp>  s     
 
 
3 455	

 ++C1+==
 ..sa.@@
 223!2DD
 66sa6HH

 
 
r-   )r   findr|   r}   r~   r   r*   r  r   r  r   s   `   @r+   r"  z#LTTextLineHorizontal.find_neighbors  sl     DKzz47DGaK$'A+FGG
 
 
 
 

 
 
 	
r-   r   otherr4  c                 B    t          |j        | j        z
            |k    S )z<Whether the left-hand edge of `other` is within `tolerance`.)r   r|   r*   r=  r4  s      r+   r6  z*LTTextLineHorizontal._is_left_aligned_with      58dg%&&)33r-   c                 B    t          |j        | j        z
            |k    S )z=Whether the right-hand edge of `other` is within `tolerance`.)r   r~   r?  s      r+   r7  z+LTTextLineHorizontal._is_right_aligned_with#  r@  r-   c                 n    t          |j        |j        z   dz  | j        | j        z   dz  z
            |k    S )z?Whether the horizontal center of `other` is within `tolerance`.   )r   r|   r~   r?  s      r+   r8  z/LTTextLineHorizontal._is_centrally_aligned_with'  8     EHux'1,$'0AQ/FFGG9TTr-   c                 B    t          |j        | j        z
            |k    S r(   )r   r   r?  s      r+   r5  z'LTTextLineHorizontal._is_same_height_as/  s    5<$+-..);;r-   r8   )r9   r:   r;   rN   r,   ri   r  r   r   r   r  r"  rU   r6  r7  r8  r5  r%  r&  s   @r+   r(  r(    su       E d    { t      
\"
 
 
j		
 
 
 
84 4; 45 4QU 4 4 4 44 4K 4E 4RV 4 4 4 4 U UU U 
	U U U U< < < <d < < < < < < < <r-   r(  c                        e Zd ZdeddfdZdeddf fdZdee         dede	e
         fd	ZddededefdZddededefdZ	 ddededefdZdededefdZ xZS )LTTextLineVerticalrF   r&   Nc                 V    t                               | |           t           | _        d S r(   )r  r,   r   _y0r+  s     r+   r,   zLTTextLineVertical.__init__4  r,  r-   r.   c                 T   t          |t                    rd| j        r]| j        t          |j        |j                  z  }|j        |z   | j        k     r(t          	                    | t          d                     |j        | _        t                      	                    |           d S r.  )r3   r   rF   r  r   r   r   rI  r   r  r   r}   r  r/  s      r+   r  zLTTextLineVertical.add:  s    c6"" 	3t'7 	3%CIsz(B(BBFv))fSkk2226Cr-   r  r   c                      | j         z  |                     j        z
   j         j        z    j        f          } fd|D             S )a>  Finds neighboring LTTextLineVerticals in the plane.

        Returns a list of other LTTextLineVerticals in the plane which are
        close to self. "Close" can be controlled by ratio. The returned objects
        will be the same width as self, and also either upper-, lower-, or
        centrally-aligned.
        c                     g | ]u}t          |t                                        |           .                    |           s.                    |           s                    |           s|vS r3  )r3   rG  _is_same_width_as_is_lower_aligned_with_is_upper_aligned_withr8  r9  s     r+   r:  z5LTTextLineVertical.find_neighbors.<locals>.<listcomp>P  s     
 
 
3 233	

 **3!*<<
 //q/AA
 223!2DD
 66sa6HH

 
 
r-   )r   r;  r|   r}   r~   r   r<  s   `   @r+   r"  z!LTTextLineVertical.find_neighborsB  sl     DJzz47Q;1dgFGG
 
 
 
 

 
 
 	
r-   r   r=  r4  c                 B    t          |j        | j        z
            |k    S )z8Whether the lower edge of `other` is within `tolerance`.)r   r}   r?  s      r+   rN  z)LTTextLineVertical._is_lower_aligned_with^  r@  r-   c                 B    t          |j        | j        z
            |k    S )z8Whether the upper edge of `other` is within `tolerance`.)r   r   r?  s      r+   rO  z)LTTextLineVertical._is_upper_aligned_withb  r@  r-   c                 n    t          |j        |j        z   dz  | j        | j        z   dz  z
            |k    S )z=Whether the vertical center of `other` is within `tolerance`.rC  )r   r}   r   r?  s      r+   r8  z-LTTextLineVertical._is_centrally_aligned_withf  rD  r-   c                 B    t          |j        | j        z
            |k    S r(   )r   r   r?  s      r+   rM  z$LTTextLineVertical._is_same_width_asn  s    5;+,,	99r-   r8   )r9   r:   r;   rN   r,   ri   r  r   r   r   r  r"  rU   rN  rO  r8  rM  r%  r&  s   @r+   rG  rG  3  sp       E d    { t      
\"
 
 
j		
 
 
 
84 4K 4E 4RV 4 4 4 44 4K 4E 4RV 4 4 4 4 U UU U 
	U U U U:{ :u : : : : : : : : :r-   rG  c                   2    e Zd ZdZddZdefdZdefdZdS )r4   zRepresents a group of text chunks in a rectangular area.

    Note that this box is created by geometric analysis and does not
    necessarily represents a logical boundary of the text. It contains a list
    of LTTextLine objects.
    r&   Nc                 H    t                               |            d| _        d S )NrM   )r  r,   r%   rQ   s    r+   r,   zLTTextBox.__init__z  s!      &&&


r-   c           	          d| j         j         d| j         dt          | j                   d|                                 d	S r   )rc   r9   r%   r   rj   rd   rQ   s    r+   rR   zLTTextBox.__repr__~  sI    e4>*eeTZee8DI;N;NeeQUQ^Q^Q`Q`eeeer-   c                     t           r(   rf   rQ   s    r+   get_writing_modezLTTextBox.get_writing_mode  s    !!r-   rS   )r9   r:   r;   rT   r,   rV   rR   rX  r=   r-   r+   r4   r4   r  sm            f# f f f f"# " " " " " "r-   r4   c                   4     e Zd Zdeddf fdZdefdZ xZS )LTTextBoxHorizontalrX   r&   Nc                     t                                          |           | j                            d            d S )Nc                     | j          S r(   )r   r.   s    r+   <lambda>z-LTTextBoxHorizontal.analyze.<locals>.<lambda>  
     r-   keyr  r[   r   sortr*   rX   rc   s     r+   r[   zLTTextBoxHorizontal.analyze  9    !!!
//00000r-   c                     dS )Nzlr-tbr=   rQ   s    r+   rX  z$LTTextBoxHorizontal.get_writing_mode      wr-   r9   r:   r;   r?   r[   rV   rX  r%  r&  s   @r+   rZ  rZ    f        1 1T 1 1 1 1 1 1#        r-   rZ  c                   4     e Zd Zdeddf fdZdefdZ xZS )LTTextBoxVerticalrX   r&   Nc                     t                                          |           | j                            d            d S )Nc                     | j          S r(   )r~   r]  s    r+   r^  z+LTTextBoxVertical.analyze.<locals>.<lambda>  r_  r-   r`  rb  rd  s     r+   r[   zLTTextBoxVertical.analyze  re  r-   c                     dS )Nztb-rlr=   rQ   s    r+   rX  z"LTTextBoxVertical.get_writing_mode  rg  r-   rh  r&  s   @r+   rk  rk    ri  r-   rk  r5   c                   4     e Zd Zdee         ddf fdZ xZS )r5   r  r&   Nc                 r    t                                                       |                     |           d S r(   )r  r,   r
  )r*   r  rc   s     r+   r,   zLTTextGroup.__init__  s1    Dr-   )r9   r:   r;   r   TextGroupElementr,   r%  r&  s   @r+   r5   r5     sQ        X&67 D          r-   c                   (     e Zd Zdeddf fdZ xZS )LTTextGroupLRTBrX   r&   Nc                     t                                          |           |j        J |j        | j                            fd           d S )Nc                 J    dz
  | j         z  dz   | j        | j        z   z  z
  S r1   )r|   r}   r   r.   rG   s    r+   r^  z)LTTextGroupLRTB.analyze.<locals>.<lambda>  s+    Q^sv5:~#&36/23 r-   r`  r  r[   rG   r   rc  r*   rX   rG   rc   s     @r+   r[   zLTTextGroupLRTB.analyze  sl    !!!"...(

3 3 3 3 	 	
 	
 	
 	
 	
r-   r9   r:   r;   r?   r[   r%  r&  s   @r+   rs  rs    K        
 
T 
 
 
 
 
 
 
 
 
 
r-   rs  c                   (     e Zd Zdeddf fdZ xZS )LTTextGroupTBRLrX   r&   Nc                     t                                          |           |j        J |j        | j                            fd           d S )Nc                 L    dz    | j         | j        z   z  dz
  | j        z  z
  S r1   )r|   r~   r   rv  s    r+   r^  z)LTTextGroupTBRL.analyze.<locals>.<lambda>  s.    a*n-#&A:~'( r-   r`  rw  rx  s     @r+   r[   zLTTextGroupTBRL.analyze  sl    !!!"...(

( ( ( ( 	 	
 	
 	
 	
 	
r-   ry  r&  s   @r+   r|  r|    rz  r-   r|  c                       e Zd ZdeddfdZdedee         dee	         fdZ
dedee	         dee         fd	Zded
ee         dee         fdZdeddfdZdS )LTLayoutContainerrj   r&   Nc                 J    t                               | |           d | _        d S r(   )r   r,   groupsrm   s     r+   r,   zLTLayoutContainer.__init__  s#    T4(((37r-   rX   r  c              #     K   d }d }|D ]}||                     |          oqt          |j        |j                  |j        z  |                    |          k     o8|                    |          t          |j        |j                  |j        z  k     }|j	        o|
                    |          oqt          |j        |j                  |j        z  |                    |          k     o8|                    |          t          |j        |j                  |j        z  k     }|rt          |t                    s|r+t          |t                    r|                    |           n||V  d }n|rA|s?t          |j                  }|                    |           |                    |           nr|rA|s?t          |j                  }|                    |           |                    |           n/t          |j                  }|                    |           |V  d }|}!|-t          |j                  }|J |                    |           |V  d S r(   )r   r   r   rC   r   r   r  r   rD   rH   r   r   r   r3   r(  rG  r  rF   )r*   rX   r  obj0lineobj1halignvaligns           r+   group_objectszLTLayoutContainer.group_objects  s     
  ?	 ?	D $$T** IDK558MMmmD))*I t,,$*dj11H4HHI	 . , K((..KDJ
33h6KKmmD))*K t,,$+t{33h6JJK    z$0DEE   )$0BCC  HHTNNNN%JJJDD  F  -h.BCCDHHTNNNHHTNNNN  F  /0DEEDHHTNNNHHTNNNN/0DEEDHHTNNNJJJDDD<'(<==D###HHTNNN




r-   linesc              #     K   t          | j                  }|                    |           i }|D ]}|                    ||j                  }|g}|D ]C}|                    |           ||v r(|                    |                    |                     Dt          |t                    rt                      }	nt                      }	t          |          D ]}
|	                    |
           |	||
<   t                      }|D ]A}||vr||         }	|	|v r|                    |	           |	                                s|	V  BdS )z$Group neighboring lines to textboxesN)r   rj   r
  r"  rE   r  popr3   r(  rZ  rk  r"   r  setr   )r*   rX   r  r  boxesr  	neighborsmembersr  boxr.   dones               r+   group_textlinesz!LTLayoutContainer.group_textlines  sr      $)#3#3U-/ 	! 	!D++E83GHHIfG! 4 4t$$$5==NN599T??333$ 455 *!4!6!6'))G}} ! ! c

! uu 	 	D5  +Cd{{HHSMMM<<>> 				 	r-   r  c           
      \   t           t          t          f         }t          | j                  dt
          dt
          dt          fd}d|d|dt          |         ffd}g }t          t          |                    D ]t}||         }t          |dz   t          |                    D ]I}	||	         }
|
                    d |||
          t          |          t          |
          ||
f           Jut          j        |                               |           t                      }t          |          dk    rXt          j        |          \  }}}}}}||vr$||vr|s( |||          rt          j        |d	|||||f           ct%          |t&          t(          f          st%          |t&          t(          f          rt)          ||g          }nt+          ||g          }                    |                               |           |                    ||g           D ]A}t          j        |d |||          t          |          t          |          ||f           B                    |           t          |          dk    Xt3          d
 D                       S )ax  Group textboxes hierarchically.

        Get pair-wise distances, via dist func defined below, and then merge
        from the closest textbox pair. Once obj1 and obj2 are merged /
        grouped, the resulting group is considered as a new object, and its
        distances to other objects & groups are added to the process queue.

        For performance reason, pair-wise distances and object pair info are
        maintained in a heap of (idx, dist, id(obj1), id(obj2), obj1, obj2)
        tuples. It ensures quick access to the smallest element. Note that
        since comparison operators, e.g., __lt__, are disabled for
        LTComponent, id(obj) has to appear before obj in element tuples.

        :param laparams: LAParams object.
        :param boxes: All textbox objects to be grouped.
        :return: a list that has only one element, the final top level group.
        r  obj2r&   c                 (   t          | j        |j                  }t          | j        |j                  }t          | j        |j                  }t          | j        |j                  }||z
  ||z
  z  | j        | j        z  z
  |j        |j        z  z
  S )a  A distance function between two TextBoxes.

            Consider the bounding rectangle for obj1 and obj2.
            Return its area less the areas of obj1 and obj2,
            shown as 'www' below. This value may be negative.
                    +------+..........+ (x1, y1)
                    | obj1 |wwwwwwwwww:
                    +------+www+------+
                    :wwwwwwwwww| obj2 |
            (x0, y0) +..........+------+
            )r   r|   r}   r  r~   r   r   r   )r  r  r|   r}   r~   r   s         r+   distz/LTLayoutContainer.group_textboxes.<locals>.distF  s     TWdg&&BTWdg&&BTWdg&&BTWdg&&BbR"W%*t{*+*t{*+r-   c                 N   t          | j        |j                  }t          | j        |j                  }t          | j        |j                  }t          | j        |j                  }t                              ||||f                    }|                    | |f          S )z8Check if there's any other object between obj1 and obj2.)	r   r|   r}   r  r~   r   r  r;  
difference)r  r  r|   r}   r~   r   r  r  s          r+   isanyz0LTLayoutContainer.group_textboxes.<locals>.isany\  s    TWdg&&BTWdg&&BTWdg&&BTWdg&&Buzz2r2r"23344D??D$<000r-   r2   Fr   Tc              3   @   K   | ]}t          t          |          V  d S r(   )r   r5   )r   gs     r+   r   z4LTLayoutContainer.group_textboxes.<locals>.<genexpr>  s,      88QDa((888888r-   )r   r4   r5   r   rj   ri   rN   r
   ranger  r  idheapqheapifyr
  r  heappopheappushr3   rk  r|  rs  removeupdater  r   )r*   rX   r  ElementTr  r  distsibox1jbox2r  
skip_isanyr   id1id2r  r  groupr=  r  s                       @r+   group_textboxesz!LTLayoutContainer.group_textboxes-  s   , K/0!&ty!1!1	{ 	+ 	% 	 	 	 	,	1 	1 	1S] 	1 	1 	1 	1 	1 	1 IKs5zz"" 	X 	XA8D1q5#e**-- X XQxeTT$%5%5r$xxD4QUVWWWWX 	eUuu%jj1nn49M%4H4H1ZCdD4coo! eeD$&7&7 N54CdD*IJJJd%6$HII :Z&8N N : *9$)F)FEE+T4L99ET"""T"""S#J'''"  ENUE 2 2BuIIr%yy%QVW    		%   / %jj1nn2 88%888888r-   c                    t          d |           \  }}|D ]}|                    |           |sd S t          |                     ||                    }t          d |          \  }}|D ]}|                    |           t          |                     ||                    }|j        [|D ]}|                    |           dt          dt          t          t          t          f         fd}	|
                    |	           nt|                     ||          | _        t                      }
| j        D ],}|                    |           |
                    |           -|
                    d            t          t           t"                   |          |z   t          t           t"                   |          z   | _        d S )Nc                 ,    t          | t                    S r(   )r3   r   r]  s    r+   r^  z+LTLayoutContainer.analyze.<locals>.<lambda>  s    :c63J3J r-   c                 *    |                                  S r(   )r   r]  s    r+   r^  z+LTLayoutContainer.analyze.<locals>.<lambda>  s    #,,.. r-   r  r&   c                 n    t          | t                    rd| j         | j         fS d| j         | j        fS )Nr   r2   )r3   rk  r~   r}   r|   r  s    r+   getkeyz)LTLayoutContainer.analyze.<locals>.getkey  s=    c#455 0w00w//r-   r`  c                     | j         S r(   r)   r  s    r+   r^  z+LTLayoutContainer.analyze.<locals>.<lambda>  s    39 r-   )r   r[   r   r  r  rG   r4   r   r<   rN   rc  r  r  r$   r6   r   r   ri   r   )r*   rX   textobjs	otherobjsr.   	textlinesempties	textboxestextboxr  assignerr  s               r+   r[   zLTLayoutContainer.analyze  s    !''J'JD Q Q9 	" 	"CKK!!!! 	F++Hh??@@	%&@&@)LL) 	" 	"CKK!!!!--h	BBCC	&$ * *))))0I 0%UE0A*B 0 0 0 0 NNvN&&&&..xCCDK$H $ $h'''U####NN44N555k"I..4$g../ 	


r-   )r9   r:   r;   r   r,   r?   r   ri   r   r  r  r4   r  r	   r   r5   r  r[   r=   r-   r+   r  r    s       8T 8d 8 8 8 8
KK {#K 
*		K K K KZ   
#  
)		       D[9[9 	"[9 
k		[9 [9 [9 [9z#
 #
T #
 #
 #
 #
 #
 #
r-   r  c                   F    e Zd ZdZdedededdfdZdefdZd	e	ddfd
Z
dS )LTFigurezRepresents an area used by PDF Form objects.

    PDF Forms can be used to present figures or pictures by embedding yet
    another PDF document within a page. Note that LTFigure objects can appear
    recursively.
    r   rj   r   r&   Nc                     || _         || _        |\  }}}}||||z   ||z   f}t          ||          }t                              | |           d S r(   )r   r   r   r  r,   )	r*   r   rj   r   r7   ywhrects	            r+   r,   zLTFigure.__init__  s^    	Aq!1a!eQU# ..""4.....r-   c           
          d| j         j         d| j         dt          | j                   dt          | j                   d	S )Nr`   r   r   r   rb   )rc   r9   r   r   rj   r!   r   rQ   s    r+   rR   zLTFigure.__repr__  sI    q4>*qqTYqq(49:M:MqqWabfbmWnWnqqqqr-   rX   c                 N    |j         sd S t                              | |           d S r(   )rI   r  r[   rZ   s     r+   r[   zLTFigure.analyze  s.    ! 	F!!$11111r-   )r9   r:   r;   rT   rV   r   r   r,   rR   r?   r[   r=   r-   r+   r  r    s         /S / /f / / / / /r# r r r r2 2T 2 2 2 2 2 2r-   r  c            	       8    e Zd ZdZd
dedededdfdZdefd	Z	dS )LTPagezRepresents an entire page.

    Like any other LTLayoutContainer, an LTPage can be iterated to obtain child
    objects like LTTextBox, LTFigure, LTImage, LTRect, LTCurve and LTLine.
    r   pageidrj   rotater&   Nc                 X    t                               | |           || _        || _        d S r(   )r  r,   r  r  )r*   r  rj   r  s       r+   r,   zLTPage.__init__  s*    ""4...r-   c           	      l    d| j         j         d| j        dt          | j                   d| j        d	S )Nr`   r   r   z rotate=rb   )rc   r9   r  r   rj   r  rQ   s    r+   rR   zLTPage.__repr__  s@    k4>*kkT[kkhty>Q>Qkk[_[fkkkkr-   r8   )
r9   r:   r;   rT   r<   r   rN   r,   rV   rR   r=   r-   r+   r  r    su          s $  d    
l# l l l l l lr-   r  )Ir  loggingtypingr   r   r   r   r   r   r	   r
   r   r   r   r   pdfminer.pdfcolorr   pdfminer.pdfexceptionsr   r   pdfminer.pdffontr   pdfminer.pdfinterpr   r   pdfminer.pdftypesr   pdfminer.utilsr   r   r   r   r   r   r   r   r   r   r    r!   r"   	getLoggerr9   loggerr$   r?   r/   r]   ri   r   r   r   r   r   r   r   r   r  r  TextLineElementr  r(  rG  r4   rZ  rk  rq  r5   rs  r|  r  r  r  r=   r-   r+   <module>r     s                                , + + + + + > > > > > > > > $ $ $ $ $ $ 5 5 5 5 5 5 5 5 ' ' ' ' ' '                               
	8	$	$
 
 
 
 
 
 
 
A
 A
 A
 A
 A
 A
 A
 A
H+ + + + + + + +" " " " " " " "F F F F F& F F FR!; !; !; !; !;k !; !; !;H
 
 
 
 
W 
 
 
B
 
 
 
 
W 
 
 
Bb b b b bk b b b*    VV   7 7 7 7 7[& 7 7 7t ')6
*
*
*" " " " "+ww/ " " "2
 
 
 
 
K0 
 
 
$
 
 
 
 
+G4f 
 
 
 '? ? ? ? ?1 ? ? ?<<< << << << <<: << << <<~<: <: <: <: <: <: <: <:~" " " " "
+ " " "&    )       	    M12     /"23   	
 	
 	
 	
 	
k 	
 	
 	
	
 	
 	
 	
 	
k 	
 	
 	
u
 u
 u
 u
 u
K0 u
 u
 u
p2 2 2 2 2  2 2 22l l l l l l l l l lr-   