
    .i/c                        d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d dlmZm Z  d d	l!m"Z" d d
l#m$Z$ ddl%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2m3Z3m4Z4m5Z5 ddl&m6Z6m7Z7m8Z8 ddl9m:Z:m;Z; ddl<m=Z=  ej>        d          Z? e@g d          ZAerddlBmCZC ddlDmEZE ddddddZFdeGdeHfd ZId!eeHef         deeHef         fd"ZJ G d# d$e          ZKd5d%e(d&e)de(fd'ZLd%e(d(e)de(fd)ZM G d* d+e-          ZN G d, d-eN          ZOd.e(d/e(ddfd0ZP G d1 d2eO          ZQ G d3 d4eO          ZRdS )6    N)	lru_cache)
TYPE_CHECKINGAnyCallableDict	GeneratorListOptionalPatternTupleUnion)	normalize)warn)PDFPageAggregator)LTCharLTComponentLTContainerLTCurveLTItemLTPageLTTextContainer)PDFPageInterpreter	PDFStackT)PDFPage)	PSLiteral   )utils)T_bboxT_numT_obj
T_obj_list)	Container)PDFStructTreeStructTreeMissing)T_table_settingsTableTableFinderTableSettings)decode_textresolve_allresolve_and_decode)MalformedPDFExceptionPdfminerException)TextMapz^LT)advheight	linewidthptssizesrcsizewidthx0x1y0y1bitsmatrixuprightfontnametext	imagemask
colorspaceevenoddfillnon_stroking_colorstrokestroking_colorstreamnamemcidtag)	PageImage)PDFzSimSun,RegularzSimHei,RegularzSimKai,RegularzSimFang,RegularzSimLi,Regular)s   s   s   _GB2312s   _GB2312s   r=   returnc                     d| v r-|                      d          dz   }| d |         | |d          }}nd| }}t                              |t          |          dd                   }t          |          dd         |z   S )N   +r          )indexCP936_FONTNAMESgetstr)r=   split_atprefixsuffix
suffix_news        K/var/www/html/analyses/venv/lib/python3.11/site-packages/pdfplumber/page.pyfix_fontname_bytesr[   \   s    x>>$''!+!)8),hxyy.Ah $$VS[[2->??Jv;;qtz))rO   kwargsc                 >    d |                                  D             S )Nc                 b    i | ],\  }}|t          |t                    rt          |          n|-S  )
isinstancelisttuple).0keyvalues      rZ   
<dictcomp>z'tuplify_list_kwargs.<locals>.<dictcomp>h   sG       C 	j55@eElll5  rO   )items)r\   s    rZ   tuplify_list_kwargsrh   g   s+      ,,..   rO   c                        e Zd ZU dZdZee         ed<   dZee	         ed<   dde
dee         ddfdZdd	Zdd
Zdef fdZd fdZd fdZ xZS )"PDFPageAggregatorWithMarkedContentzZExtract layout from a specific page, adding marked-content IDs to
    objects where found.Ncur_mcidcur_tagrI   propsrL   c                     t          |j                  | _        t          |t                    rd|v r|d         | _        dS d| _        dS )z5Handle beginning of tag, setting current MCID if any.MCIDN)r)   rG   rl   r`   dictrk   )selfrI   rm   s      rZ   	begin_tagz,PDFPageAggregatorWithMarkedContent.begin_tagu   sG    "38,,eT"" 	!v!&MDMMM DMMMrO   c                 "    d| _         d| _        dS )z/Handle beginning of tag, clearing current MCID.N)rl   rk   rq   s    rZ   end_tagz*PDFPageAggregatorWithMarkedContent.end_tag}   s    rO   c                 v    | j         j        r,| j         j        d         }| j        |_        | j        |_        dS dS )z^Add current MCID to what we hope to be the most recent object created
        by pdfminer.six.rQ   N)cur_item_objsrk   rH   rl   rI   )rq   cur_objs     rZ   tag_cur_itemz/PDFPageAggregatorWithMarkedContent.tag_cur_item   s@     = 	'm)"-G=GL,GKKK	' 	'rO   c                 b     t                      j        |i |}|                                  |S )z;Hook for rendering characters, adding the `mcid` attribute.)superrender_charrz   )rq   argsr\   r/   	__class__s       rZ   r}   z.PDFPageAggregatorWithMarkedContent.render_char   s5    !egg!42622
rO   c                 b     t                      j        |i | |                                  dS )z7Hook for rendering images, adding the `mcid` attribute.N)r|   render_imagerz   rq   r~   r\   r   s      rZ   r   z/PDFPageAggregatorWithMarkedContent.render_image   s7    d-f---rO   c                 b     t                      j        |i | |                                  dS )zAHook for rendering lines and curves, adding the `mcid` attribute.N)r|   
paint_pathrz   r   s      rZ   r   z-PDFPageAggregatorWithMarkedContent.paint_path   s7    D+F+++rO   NrL   N)__name__
__module____qualname____doc__rk   r
   int__annotations__rl   rU   r   r   rr   ru   rz   floatr}   r   r   __classcell__r   s   @rZ   rj   rj   n   s          #Hhsm"""!GXc]!!!! !Y !x	/B !d ! ! ! !   
' ' ' 'e           
         rO   rj   box_rawrotationc                     t          d | D                       st          d|            t          | d         | d         f          \  }}t          | d         | d         f          \  }}|dv r||||fS ||||fS )Nc              3   J   K   | ]}t          |t          j                  V  d S r   )r`   numbersNumberrc   xs     rZ   	<genexpr>z!_normalize_box.<locals>.<genexpr>   s.      >>z!W^,,>>>>>>rO   z0Bounding box contains non-number coordinate(s): r   rP   r      )Z   i  )allr,   sorted)r   r   r6   r7   r8   r9   s         rZ   _normalize_boxr      s    
 >>g>>>>> 
#HwHH
 
 	
 WQZ,--FBWQZ,--FB9BBBBrO   	mb_heightc                 (    | \  }}}}|||z
  |||z
  fS r   r_   )r   r   r6   r8   r7   r9   s         rZ   _invert_boxr      s&    NBB	BIN33rO   c                      e Zd ZU ej        dgz   Zee         ed<   dZe	ed<   dZ
	 dGddd	ed
edefdZdHdZedefd            Zedefd            Zedeeeef                  fd            Zedefd            Zedefd            Zedefd            Zedeeef         fd            Zdeeef         deeef         fdZdedefdZ dee!         de"eddf         fdZ#deeef         fdZ$	 dIde%e&         de'fdZ(	 dIde%e&         dee)         fdZ*	 dIde%e&         de%e)         fd Z+	 dIde%e&         deeee%e                                    fd!Z,	 dIde%e&         de%eee%e                                    fd"Z-d#ede.fd$Z/	 	 	 	 	 dJd%e0ee1e         f         d&e	d'e	d(ed)e	d*e	d#edeeeef                  fd+Z2d#edefd,Z3d#edefd-Z4d#edefd.Z5	 dKd/e	d)e	d#edefd0Z6	 dLd2e7d3e	d4e	dd5fd6Z8	 dLd2e7d3e	d4e	dd5fd7Z9	 dLd2e7d3e	d4e	dd5fd8Z:d9e;ege	f         dd:fd;Z<d#edd:fd<Z=	 	 	 	 	 dMd=e%e0ee>f                  d>e%e0ee>f                  d?e%e0ee>f                  d@e	dAe	ddBfdCZ?dIdDe%ee                  deeef         fdEZ@defdFZAdS )NPage_layoutcached_propertiesTis_originalNr   pdfrK   page_objpage_numberinitial_doctopc                 x   || _         | | _        | _        || _        || _        ddt
          dt          dt          ffd} |dd          }|dz  | _        t           |d          | j                  }|d	         |d
         z
  }t          ||          | _
        dD ]Z}	|	j        v rOt          t           ||	          | j                  |          }
t          | |	                                |
           [dj        vr| j
        | _        | j
        | _         t!                      | j                  | _        d S )Nrd   defaultrL   c                 ^    t          j                            |                     }||n|S r   )r*   attrsrT   )rd   r   re   r   s      rZ   get_attrzPage.__init__.<locals>.get_attr   s.     2 23 7 788E#m776rO   Rotater   ih  MediaBoxr   r   )CropBoxTrimBoxBleedBoxArtBoxr   r   )r   	root_pager   r   r   rU   r   r   r   r   mediaboxr   setattrlowercropboxbboxr   _get_textmapget_textmap)rq   r   r   r   r   r   	_rotationmb_rawr   box_namebox_normalizeds     `        rZ   __init__zPage.__init__   sg     &,	7 	7# 	7 	7s 	7 	7 	7 	7 	7 	7 HXq))	!C 4 4dmDD1Iq	)	#FI66D 	@ 	@H8>))!,"88H#5#5t}EEy" " hnn..???HN**=DL M	 '9;;t'899rO   rL   c                 `    |                                   | j                                         d S r   )flush_cacher   cache_clearrt   s    rZ   closez
Page.close   s/    $$&&&&&rO   c                 8    | j         d         | j         d         z
  S )NrP   r   r   rt   s    rZ   r5   z
Page.width       y|dil**rO   c                 8    | j         d         | j         d         z
  S )Nr   r   r   rt   s    rZ   r0   zPage.height   r   rO   c                 f    	 d t          | j        |           D             S # t          $ r g cY S w xY w)z-Return the structure tree for a page, if any.c                 6    g | ]}|                                 S r_   )to_dict)rc   elems     rZ   
<listcomp>z'Page.structure_tree.<locals>.<listcomp>   s     MMMtDLLNNMMMrO   )r#   r   r$   rt   s    rZ   structure_treezPage.structure_tree   sK    	MM}TXt/L/LMMMM  	 	 	III	s   ! 00c                 t   t          | d          r| j        S t          | j        j        | j        | j        j                  }t          | j        j        |          }	 |                    | j	                   n!# t          $ r}t          |          d }~ww xY w|                                | _        | j        S )Nr   )pagenolaparams)hasattrr   rj   r   rsrcmgrr   r   r   process_pager   	Exceptionr-   
get_result)rq   deviceinterpreteres       rZ   layoutzPage.layout   s    4## 	 <3H#X&
 
 

 ))96BB	'$$T]3333 	' 	' 	'#A&&&	'%0022|s   A9 9
BBBc                 v    dt           t          t          f         dt          dt           t          t          f         f fddt          dt          f fd}t	           j        j                  pg }t          t          ||                    }t           t                    r                     |          S |S )NptrrL   c                 |    |dz  }t          |          D ]%}| \  }}||dz  k    rj        nj        }|||z
  f} &| S )Nr   rP   )ranger5   r0   )r   r   turnsir   ycomprq   s          rZ   rotate_pointz!Page.annots.<locals>.rotate_point  sW    GE5\\ % %1%&%!)^^tzz$(_IrO   annotc                 8   | d         \  }}}} ||fj                   } ||fj                   }j        j        }t          t	          g ||R           |          \  }}	}
}|                     di           }|                    d          |                     d          |                     d          d}|                                D ]~\  }}|w	 |                    d          ||<   !# t          $ rP 	 |                    d          ||<   n3# t          $ r& j	        j
        r t          d	| d
| d           Y nw xY wY zw xY wj        d|||z
  |
||	z
  j        |	z   |	||
|z
  ||	z
  d}|                    |           d| v r| d<   | |d<   |S )NRectAURITContents)urititlecontentszutf-8zutf-16zCould not decode z of annotation. z will be missing.r   )r   object_typer6   r8   r7   r9   doctoptopbottomr5   r0   Pdata)r   r   r0   r   r   rT   rg   decodeUnicodeDecodeErrorr   raise_unicode_errorsr   r   r   update)r   _a_b_c_dpt0pt1rhr6   r   r7   r   aextraskvparsedr   rq   s                    rZ   parsezPage.annots.<locals>.parse  s$   "6]NBB,Bx77C,Bx77C&B"-n\s\S\\.J.JB"O"OBR		#r""AuuU||3!IIj11 F
   1=$%HHW$5$5q		- 	 	 	()(:(:F1II1   #x< & % !9A !9 !9$%!9 !9 !9    	 !  $/&6k3h-3 b 3, F MM&!!! e||!c
"F6NMs6   C22
E=DE-EEEEE)r   r   r   r    r*   r   annotsra   mapr`   CroppedPage_crop_fn)rq   r  rawr  r   s   `   @rZ   r  zPage.annots  s    	U5%<0 	S 	U5%<=P 	 	 	 	 	 	/	 /	5 /	 /	 /	 /	 /	 /	 /	b $-.//52c%oo&&dK(( 	==(((MrO   c                 $    d | j         D             S )Nc                 "    g | ]}|d          
|S )r   r_   )rc   r  s     rZ   r   z#Page.hyperlinks.<locals>.<listcomp>T  s!    ???a!E(*>*>*>*>rO   )r  rt   s    rZ   
hyperlinkszPage.hyperlinksR  s    ??4;????rO   c                 p    t          | d          r| j        S |                                 | _        | j        S )N_objects)r   r  parse_objectsrt   s    rZ   objectszPage.objectsV  s7    4$$ 	!= /3/A/A/C/C}rO   r   c                 j    | j         d         |d         z   | j         d         | j        z   |d         z
  fS )Nr   r   )r   r0   )rq   r   s     rZ   point2coordzPage.point2coord]  s4    a 2a5($-*:T[*H2a5*PQQrO   objc           
          t          j        t          d|j        j                                                  }dt          t          t          f         dt          t          t          t          f                  fd}t          t          d t          ||j                                                                      }||d<    j        |d<   dD ]7}t!          ||          r%t#          t%          ||          j                  ||<   8t)          |t*          t,          f          r?|                                } j        j        t5           j        j        |          n||d<   t)          |t*                    r|j        }t)          |j        t:                    r|j        n|j        f|d	<   t)          |j        t:                    r|j        n|j        f|d
<   t)          |d         t>                    rtA          |d                   |d<   nat)          |tB          f          rKtE          t           j#        |d                             |d<    fd|j$        D             |d<   |j%        |d<    j&        d d         \  }}	d|v r? j'        |d         z
  |	z   |d<    j'        |d         z
  |	z   |d<    j(        |d         z   |d<   d|v r"|dk    r|d         |z   |d<   |d         |z   |d<   |S )N itemrL   c                 H    | \  }}|t           v rt          |          }||fS d S r   )	ALL_ATTRSr*   )r  r  r  ress       rZ   process_attrz)Page.process_object.<locals>.process_attrd  s-    DAqI~~!!nn3xtrO   r   r   )ncsscsr>   rE   rC   r=   r2   c                 F    g | ]^}}|gt          j        |          R S r_   )r  r  )rc   cmdr2   rq   s      rZ   r   z'Page.process_object.<locals>.<listcomp>  s5    bbb93S>3t'7#=#=>>bbbrO   pathdashrP   r8   r9   r   r   r   r6   r   r7   ))resublt_patr   r   r   r   rU   r   r
   rp   filterr  __dict__rg   r   r   r+   getattrrG   r`   r   r   get_textr   unicode_normnormalize_unicodegraphicstatescolorrb   ncolorbytesr[   r   ra   r  original_pathdashing_styler   r0   r   )
rq   r  kindr  attrcsr>   gsmb_x0mb_tops
   `         rZ   process_objectzPage.process_objecta  s   vfb#-"899??AA	uS#X 	8E#s(O3L 	 	 	 	 F4\3<3E3E3G3G!H!HIIJJ"]".]  	E 	EB sB E-gc2.>.>.CDDRcFO455 	<<>>D 8(4 "$("7>>> L c6"" 	- !B'	599K			| !" (	599K			| %&
 $z*E22 H#5d:6F#G#GZ gZ(( 	-s4#3T%[AABBDK cbbbPSPabbbDL,DL
 bqb)v4<<;d3v=DK"kDJ6&@DN!04;>DN4<<EQJJde+DJde+DJrO   layout_objectsc              #      K   |D ]r}t          |t                    rD| j        j        |                     |          V  |                     |j                  E d {V  [|                     |          V  sd S r   )r`   r   r   r   r8  iter_layout_objectsrx   )rq   r9  r  s      rZ   r;  zPage.iter_layout_objects  s       " 		/ 		/C#{++ /8$0--c2222233CI>>>>>>>>>>))#......		/ 		/rO   c                     i }|                      | j        j                  D ]D}|d         }|dv r|                    |          g ||<   ||                             |           E|S )Nr   )anno)r;  r   rx   rT   append)rq   r  r  r2  s       rZ   r  zPage.parse_objects  s|    )+++DK,=>> 	& 	&C}%Dx{{4  ( "DM  %%%%rO   table_settingsc                 J    t          j        |          }t          | |          S r   )r(   resolver'   rq   r?  tsets      rZ   debug_tablefinderzPage.debug_tablefinder  s$     $^444&&&rO   c                 T    t          j        |          }t          | |          j        S r   )r(   rA  r'   tablesrB  s      rZ   find_tableszPage.find_tables  s'     $^444&&--rO   c                    t          j        |          }|                     |          }t          |          dk    rd S dt          dt
          t          t          t          f         fd}t          t          ||                    d         }|S )Nr   r   rL   c                 ^    t          | j                   | j        d         | j        d         fS )Nr   r   )lencellsr   r   s    rZ   sorterzPage.find_table.<locals>.sorter  s%    \\M16!9afQi88rO   )rd   )
r(   rA  rG  rJ  r&   r   r   r   ra   r   )rq   r?  rC  rF  rM  largests         rZ   
find_tablezPage.find_table  s     $^44!!$''v;;!4	9e 	9c5%&7 8 	9 	9 	9 	9 vf&1112215rO   c                 r    t          j        |          |                               }fd|D             S )Nc                 8    g | ]} |j         d i j        pi S )r_   )extracttext_settings)rc   tablerC  s     rZ   r   z'Page.extract_tables.<locals>.<listcomp>  s3    PPP;;!3!9r;;PPPrO   )r(   rA  rG  )rq   r?  rF  rC  s      @rZ   extract_tableszPage.extract_tables  sB     $^44!!$''PPPPPPPPrO   c                     t          j        |          }|                     |          }|d S  |j        di |j        pi S Nr_   )r(   rA  rO  rR  rS  )rq   r?  rC  rT  s       rZ   extract_tablezPage.extract_table  sO     $^44%%=4 5=>>D$6$<">>>rO   r\   c                     t          | j                  }d|vr|                    d| j        i           d|vr|                    d| j        i           i ||}t          j        | j        fi |S )N)layout_bboxlayout_width_charslayout_widthlayout_height_charslayout_height)rp   r   r   r5   r0   r   chars_to_textmapchars)rq   r\   defaultsfull_kwargss       rZ   r   zPage._get_textmap  s    #'	$
 $
 $
  v--OO^TZ8999 ..OO_dk:;;;&<&<V&<%dj@@K@@@rO   patternregexcase
main_groupreturn_charsreturn_groupsc                 l     | j         di t          |          }|                    ||||||          S )N)rd  re  rf  rg  rh  r_   )r   rh   search)	rq   rc  rd  re  rf  rg  rh  r\   textmaps	            rZ   rj  zPage.search  sR     #$"AA%8%@%@AA~~!%'  
 
 	
rO   c                 @     | j         di t          |          j        S rW  )r   rh   	as_stringrq   r\   s     rZ   extract_textzPage.extract_text  s'    t>>"5f"="=>>HHrO   c                 0    t          j        | j        fi |S r   )r   extract_text_simpler`  rn  s     rZ   rq  zPage.extract_text_simple  s    (>>v>>>rO   c                 0    t          j        | j        fi |S r   )r   extract_wordsr`  rn  s     rZ   rs  zPage.extract_words  s    "4:88888rO   stripc                 `     | j         di t          |                              ||          S )N)rt  rg  r_   )r   rh   extract_text_lines)rq   rt  rg  r\   s       rZ   rv  zPage.extract_text_lines  sC      t>>"5f"="=>>QQl R 
 
 	
rO   Fr   relativestrictr	  c                 (    t          | |||          S )N)rw  rx  )r	  rq   r   rw  rx  s       rZ   cropz	Page.crop!  s     4HHHHrO   c                 >    t          | |||t          j                  S zS
        Same as .crop, except only includes objects fully within the bbox
        )rw  rx  crop_fn)r	  r   within_bboxrz  s       rZ   r  zPage.within_bbox&  s)     $&%BS
 
 
 	
rO   c                 >    t          | |||t          j                  S r}  )r	  r   outside_bboxrz  s       rZ   r  zPage.outside_bbox0  s)     $&%BT
 
 
 	
rO   test_functionFilteredPagec                 "    t          | |          S r   )r  )rq   r  s     rZ   r&  zPage.filter:  s    D-000rO   c                     t          | d           }d | j                                        D             |_        t	          j        | j        fi ||j        d<   |S )u   
        Removes duplicate chars — those sharing the same text and positioning
        (within `tolerance`) as other characters in the set. Adjust extra_args
        to be more/less restrictive with the properties checked.
        c                     dS )NTr_   rL  s    rZ   <lambda>z#Page.dedupe_chars.<locals>.<lambda>C  s     rO   c                     i | ]\  }}||	S r_   r_   )rc   r2  objss      rZ   rf   z%Page.dedupe_chars.<locals>.<dictcomp>D  s    HHHZT4dDHHHrO   char)r  r  rg   r  r   dedupe_charsr`  )rq   r\   ps      rZ   r  zPage.dedupe_chars=  s^     ~~..HH4<3E3E3G3GHHH
"/
EEfEE
6rO   
resolutionr5   r0   	antialiasforce_mediaboxrJ   c                     ddl m}m} t          d |||fD                       }|dk    rt	          d|           |d|z  | j        z  }n|d|z  | j        z  } || |p|||          S )z
        You can pass a maximum of 1 of the following:
        - resolution: The desired number pixels per inch. Defaults to 72.
        - width: The desired image width in pixels.
        - height: The desired image width in pixels.
        r   )DEFAULT_RESOLUTIONrJ   c              3      K   | ]}|d uV  	d S r   r_   r   s     rZ   r   z Page.to_image.<locals>.<genexpr>X  s&      KK!KKKKKKrO   zUOnly one of these arguments can be provided: resolution, width, height. You provided NH   )r  r  r  )displayr  rJ   sum
ValueErrorr5   r0   )	rq   r  r5   r0   r  r  r  rJ   	num_specss	            rZ   to_imagezPage.to_imageH  s     	;:::::::KK
E6/JKKKKK	q==shqss   edj0JJft{2Jy!7%7)	
 
 
 	
rO   object_typesc           	         |+t          | j                                                  dgz   }n|}| j        | j        | j        | j        | j        | j        | j	        | j
        d}|D ]}t          | |dz             ||dz   <   |S )Nr   )r   r   r   r   r   r   r5   r0   s)ra   r  keysr   r   r   r   r   r   r5   r0   r(  )rq   r  _object_typesdts        rZ   r   zPage.to_dicti  s     !2!2!4!455	AMM(M+"1|IZk	
 	
  	0 	0A q3w//Aa#gJJrO   c                     d| j          dS )Nz<Page:>)r   rt   s    rZ   __repr__zPage.__repr__|  s    +(++++rO   r   r   r   )TTr   TT)TT)FT)NNNFF)Br   r   r   r"   r   r	   rU   r   r   boolpagesr   r   r   r   r   propertyr5   r0   r   r   r   r   r   r!   r  r  r  r   r  r   r    r8  r   r   r;  r  r
   r%   r'   rD  r&   rG  rO  rU  rX  r.   r   r   r   rj  ro  rq  rs  rv  r   r{  r  r  r   r&  r  r   r  r   r  r_   rO   rZ   r   r      s)        #,#>)#LtCyLLLKE !"*: *:*: *: 	*:
 *: *: *: *:X' ' ' ' +u + + + X+ + + + + X+ T#s(^ 4    X     X  ?
 ? ? ? X?B @J @ @ @ X@ c:o.    XReE5L1 ReE5L6I R R R RH& HU H H H HT/";//	5$$	%/ / / /	tCO4 	 	 	 	 <@' '&'78'	' ' ' ' <@. .&'78.	e. . . . <@ &'78	%   $ <@Q Q&'78Q	d4&'	(Q Q Q Q <@? ?&'78?	$tHSM*+	,? ? ? ?	AS 	AW 	A 	A 	A 	A !"
 
sGCL()
 
 	

 
 
 
 
 
d38n	
 
 
 
(IS IS I I I I?C ?C ? ? ? ?9c 9j 9 9 9 9 8<
 

04
GJ
	
 
 
 
 DHI II&*I<@I	I I I I DH
 

&*
<@
	
 
 
 
 DH
 

&*
<@
	
 
 
 
1HeWd]$; 1 1 1 1 1	S 	^ 	 	 	 	 37-1.2$
 
U3:./
 c5j)*
 sEz*+	

 
 
 

 
 
 
B HT#Y$7 4S>    &,# , , , , , ,rO   r   c                   *    e Zd ZU dZeed<   defdZdS )DerivedPageFr   parent_pagec                 T   || _         |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        | 	                    t          j                    t                      | j                  | _        d S r   )r  r   r   r   r   r   r   r   r   r   r"   r   r   r   r   )rq   r  s     rZ   r   zDerivedPage.__init__  s    &$.?#,&2)8#,#,"*4555&9;;t'899rO   N)r   r   r   r   r  r   r   r   r_   rO   rZ   r  r    sA         K:D : : : : : :rO   r  r   parent_bboxc                    t          j        |           }|dk    rt          d|  d          t          j        | |          }|t          d|  d|           t          j        |          }||k     rt          d|  d|           d S )Nr   zBounding box z has an area of zero.z. is entirely outside parent page bounding box z. is not fully within parent page bounding box )r   calculate_arear  get_bbox_overlap)r   r  	bbox_areaoverlapoverlap_areas        rZ   test_proposed_bboxr    s    $T**IA~~DDDDEEE$T;77G6D 6 6(36 6
 
 	

 '00Li6D 6 6(36 6
 
 	
  rO   c                        e Zd Zej        ddfdededeeegef         de	de	f
 fdZ
ed	eeef         fd
            Z xZS )r	  FTr  	crop_bboxr~  rw  rx  c                 H   |r%|j         \  }}}}\  }	}
}}|	|z   |
|z   ||z   ||z   f|rt          |j                    dt          dt          ffd}t                                          |           || _        t          j        u r|j         | _         d S | _         d S )Nr  rL   c                      |           S r   r_   )r  r  r~  s    rZ   r
  z&CroppedPage.__init__.<locals>._crop_fn  s    74+++rO   )r   r  r!   r|   r   r
  r   r  )rq   r  r  r~  rw  rx  o_x0o_top_r6   r   r7   r   r
  r   s     ``          rZ   r   zCroppedPage.__init__  s      	L + 0D%A"+BRdC%KdFUNKI 	<y+*:;;;	,: 	,* 	, 	, 	, 	, 	, 	, 	, 	%%%  e(((#(DIII!DIIIrO   rL   c                      t           d          r j        S  fd j        j                                        D              _         j        S )Nr  c                 B    i | ]\  }}|                     |          S r_   )r
  rc   r  r  rq   s      rZ   rf   z'CroppedPage.objects.<locals>.<dictcomp>  s8     0
 0
 0
$(AqAt}}Q0
 0
 0
rO   r   r  r  r  rg   rt   s   `rZ   r  zCroppedPage.objects  sc    4$$ 	!= 0
 0
 0
 0
,0,<,D,J,J,L,L0
 0
 0
 }rO   )r   r   r   r   crop_to_bboxr   r   r   r!   r  r   r  r   rU   r  r   r   s   @rZ   r	  r	    s        
 ?D>P" "" " :v.
:;	"
 " " " " " " ": c:o.    X    rO   r	  c                   f     e Zd Zdedeegef         f fdZede	e
ef         fd            Z xZS )r  r  	filter_fnc                 p    |j         | _         || _        t                                          |           d S r   )r   r  r|   r   )rq   r  r  r   s      rZ   r   zFilteredPage.__init__  s2    $	"%%%%%rO   rL   c                      t           d          r j        S  fd j        j                                        D              _         j        S )Nr  c           	      \    i | ](\  }}|t          t          j        |                    )S r_   )ra   r&  r  r  s      rZ   rf   z(FilteredPage.objects.<locals>.<dictcomp>  sC     0
 0
 0
1 tF4>1--..0
 0
 0
rO   r  rt   s   `rZ   r  zFilteredPage.objects  sc    4$$ 	!= 0
 0
 0
 0
(066880
 0
 0
 }rO   )r   r   r   r   r   r    r  r   r  r   rU   r!   r  r   r   s   @rZ   r  r    s        &D &Xugtm5L & & & & & &
 c:o.    X    rO   r  r  )Sr   r#  	functoolsr   typingr   r   r   r   r   r	   r
   r   r   r   unicodedatar   r+  warningsr   pdfminer.converterr   pdfminer.layoutr   r   r   r   r   r   r   pdfminer.pdfinterpr   r   pdfminer.pdfpager   pdfminer.psparserr   r  r   _typingr   r   r    r!   	containerr"   	structurer#   r$   rT  r%   r&   r'   r(   r)   r*   r+   utils.exceptionsr,   r-   
utils.textr.   compiler%  setr  r  rJ   r   rK   rS   r/  rU   r[   rh   rj   r   r   r   r  r  r	  r  r_   rO   rZ   <module>r     sv    				                              7 6 6 6 6 6       0 0 0 0 0 0                  = < < < < < < < $ $ $ $ $ $ ' ' ' ' ' '       5 5 5 5 5 5 5 5 5 5 5 5             7 7 7 7 7 7 7 7 F F F F F F F F F F F F ? ? ? ? ? ? ? ? ? ? F F F F F F F F      	F		C   	B  """"""
 *) 0 1( * *3 * * * *S#X 4S>    0 0 0 0 0): 0 0 0f   F  e  F        (4 4E 4f 4 4 4 4
C, C, C, C, C,9 C, C, C,L: : : : :$ : : :"
V 
& 
T 
 
 
 
(% % % % %+ % % %P    ;     rO   