
    .iʊ                        d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d d	l-m.Z.m/Z/ d d
l0m1Z1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZB  ejC        eD          ZE G d de*          ZF G d deF          ZG ede
ee8          ZH G d deFeeH                   ZI G d deIe8                   ZJ G d deIe8                   ZK G d deIe8                   ZL G d deIe8                   ZMdS )    N)BinaryIODictGenericListOptionalSequenceTextIOTupleTypeVarUnioncast)utils)ImageWriter)LAParamsLTAnnoLTCharLTComponentLTContainerLTCurveLTFigureLTImageLTItemLTLayoutContainerLTLineLTPageLTRectLTText	LTTextBoxLTTextBoxVerticalLTTextGroup
LTTextLineTextGroupElement)PDFColorSpace)PDFTextDevice)PDFValueError)PDFFontPDFUnicodeNotDefined)PDFGraphicStatePDFResourceManager)PDFPage)	PDFStream)AnyIOMatrixPathSegmentPointRectapply_matrix_ptapply_matrix_rectbbox2strencmake_compat_strmult_matrixc                   *   e Zd ZU eed<   eed<   	 	 d&dededee	         ddfd	Z
d
ededdfdZd
eddfdZdedededdfdZdeddfdZdededdfdZdededededee         ddfdZdedededededed ed!edefd"Zdededefd#Zd$eddfd%ZdS )'PDFLayoutAnalyzercur_itemctm   Nrsrcmgrpagenolaparamsreturnc                 Z    t          j        | |           || _        || _        g | _        d S N)r$   __init__r=   r>   _stackselfr<   r=   r>   s       N/var/www/html/analyses/venv/lib/python3.11/site-packages/pdfminer/converter.pyrB   zPDFLayoutAnalyzer.__init__E   s0     	tW--- /1    pagec                     t          ||j                  \  }}}}ddt          ||z
            t          ||z
            f}t          | j        |          | _        d S )Nr   )r2   mediaboxabsr   r=   r9   )rE   rH   r:   x0y0x1y1rJ   s           rF   
begin_pagezPDFLayoutAnalyzer.begin_pageP   sU    ,S$-@@RRq#b2g,,BG5t{H55rG   c                    | j         r)J t          t          | j                                         t          | j        t
                    s)J t          t          | j                                        | j        | j                            | j                   | xj	        dz  c_	        | 
                    | j                   d S )Nr;   )rC   strlen
isinstancer9   r   typer>   analyzer=   receive_layout)rE   rH   s     rF   end_pagezPDFLayoutAnalyzer.end_pageU   s    ;55C$4$4 5 5555$-00JJ#d4=6I6I2J2JJJJ=$M!!$-000qDM*****rG   namebboxmatrixc                     | j                             | j                   t          ||t	          || j                            | _        d S rA   )rC   appendr9   r   r6   r:   )rE   rY   rZ   r[   s       rF   begin_figurezPDFLayoutAnalyzer.begin_figure]   s=    4=))) t[-J-JKKrG   _c                 
   | j         }t          | j         t                    s)J t          t	          | j                                         | j                                        | _         | j                             |           d S rA   )r9   rT   r   rR   rU   rC   popadd)rE   r_   figs      rF   
end_figurezPDFLayoutAnalyzer.end_figurea   sj    m$-22LLCT]8K8K4L4LLLL))#rG   streamc                 :   t          | j        t                    s)J t          t	          | j                                        t          ||| j        j        | j        j        | j        j        | j        j	        f          }| j        
                    |           d S rA   )rT   r9   r   rR   rU   r   rL   rM   rN   rO   rb   )rE   rY   re   items       rF   render_imagezPDFLayoutAnalyzer.render_imageg   s    $-22LLCT]8K8K4L4LLLL]t}/1A4=CST
 

 	$rG   gstatestrokefillevenoddpathc                     d                     d D                       }|dd         dk    rdS |                    d          dk    rct          j        d|          D ]K}|                    d          |                    d                   }                     |||||           LdS fdD             }	 fd	|	D             }
d
 D             } fdD             }d t          ||          D             }t          |          dk    rA|dd         dk    r3|
d         |
d         k    r!|dd         dz   }|
	                                 |dv rUt          |j        |
d         |
d         ||||j        |j        ||j        
  
        } j                            |           dS |dv r|
\  \  }}\  }}\  }}\  }}}|
d         |
d         k    }||k    o||k    o||k    o||k    p||k    o||k    o||k    o||k    }|rZ|rXt#          |j        g |
d         |
d         R ||||j        |j        ||j        	  	        } j                            |           dS t%          |j        |
||||j        |j        ||j        	  	        } j                            |           dS t%          |j        |
||||j        |j        ||j        	  	        } j                            |           dS )z@Paint paths described in section 4.4 of the PDF reference manual c              3   &   K   | ]}|d          V  dS )r   N ).0xs     rF   	<genexpr>z/PDFLayoutAnalyzer.paint_path.<locals>.<genexpr>y   s&      ++!++++++rG   Nr;   mzm[^m]+r   c                     g | ];}t          t          |d          dk    r
|dd         nd          dd                   <S )r   hN)r   r/   )rr   prm   s     rF   
<listcomp>z0PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>   sT       IJUadckkAbccFFtAwrss|DD  rG   c                 :    g | ]}t          j        |          S rq   )r1   r:   )rr   ptrE   s     rF   rz   z0PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>   s%    CCCR?48R00CCCrG   c                 8    g | ]}t          |d                    S )r   )rR   )rr   	operations     rF   rz   z0PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>   s$    AAAyYq\**AAArG   c           
      l    g | ]0}fd t          |ddd         |ddd                   D             1S )c           	      x    g | ]6\  }}t          j        t          |          t          |          f          7S rq   )r1   r:   float)rr   operand1operand2rE   s      rF   rz   z;PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>.<listcomp>   sJ       *( $DHuXh.PQQ  rG   r;   N   )zip)rr   r~   rE   s     rF   rz   z0PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>   sk     " " "
 	   .1)ADqD/9QTPQT?.S.S  " " "rG   c                 F    g | ]\  }}t          t          |g|R           S rq   )r   r.   )rr   ory   s      rF   rz   z0PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>   s>          Aq [1'q''**     rG      rx   lhrw   >   mlmlh)original_pathdashing_style>   mlllhmllll   r   )joincountrefinditerstartend
paint_pathr   rS   ra   r   	linewidthscolorncolordashr9   rb   r   r   )rE   ri   rj   rk   rl   rm   shaperu   subpathraw_ptspts	operatorstransformed_pointstransformed_pathlinerL   rM   rN   rO   x2y2x3y3r_   is_closed_loophas_square_coordinatesrectcurves   `    `                      rF   r   zPDFLayoutAnalyzer.paint_pathp   s    ++d+++++!9 D[[!![E22 H HqwwqzzAEE!HH45gwGGGGH H   NR  G DCCC7CCCCAADAAAI" " " "
 "&" " "   	+=>>      5zzA~~%*"4"4RCF9J9Jcrc
S(			%%
 $FFMM"2"(+   !!$''''',,,<?9R(2rHRhr2!$Q3q6!1"HCrCbBhC28*GBhE28EbER2X ' " -&< -!(*#a&*3q6**(
 
D M%%d+++++#((
 
E M%%e,,,,,$MM$K
 
 !!%(((((rG   fontfontsizescalingrisecidncsgraphicstatec	                    	 |                     |          }	t          |	t                    s$J t          t          |	                                n&# t          $ r |                     ||          }	Y nw xY w|                    |          }
|                    |          }t          ||||||	|
|||
  
        }| j	        
                    |           |j        S rA   )	to_unichrrT   rR   rU   r'   handle_undefined_char
char_width	char_dispr   r9   rb   adv)rE   r[   r   r   r   r   r   r   r   text	textwidthtextdisprg   s                rF   render_charzPDFLayoutAnalyzer.render_char   s    	9>>#&&DdC((99#d4jj//9999# 	9 	9 	9--dC88DDD	9OOC((	>>#&&
 
 	$xs   AA  A43A4c                 D    t                               d||           d|z  S )Nzundefined: %r, %rz(cid:%d))logdebug)rE   r   r   s      rF   r   z'PDFLayoutAnalyzer.handle_undefined_char  s$    		%tS111CrG   ltpagec                     d S rA   rq   rE   r   s     rF   rW   z PDFLayoutAnalyzer.receive_layout  s    rG   r;   N) __name__
__module____qualname__r   __annotations__r-   r)   intr   r   rB   r*   rP   rX   rR   r0   r^   rd   r+   rh   r(   boolr   r.   r   r&   r   r#   r   r   r   rW   rq   rG   rF   r8   r8   A   s[        	KKK
 '+		2 	2#	2 	2 8$		2
 
	2 	2 	2 	26w 6V 6 6 6 6 6
+W + + + + +L LD L& LT L L L LC D       i  D        y)y) y) 	y)
 y) {#y) 
y) y) y) y)v  	
     & 
   B '            V       rG   r8   c            	       T    e Zd Z	 	 ddededee         ddfdZdeddfd	Z	defd
Z
dS )PDFPageAggregatorr;   Nr<   r=   r>   r?   c                 P    t                               | |||           d | _        d S N)r=   r>   )r8   rB   resultrD   s       rF   rB   zPDFPageAggregator.__init__  s,     	""4("SSS(,rG   r   c                     || _         d S rA   r   r   s     rF   rW   z PDFPageAggregator.receive_layout  s    rG   c                 "    | j         J | j         S rA   r   rE   s    rF   
get_resultzPDFPageAggregator.get_result!  s    {&&&{rG   r   )r   r   r   r)   r   r   r   rB   r   rW   r   rq   rG   rF   r   r     s         '+	- -#- - 8$	-
 
- - - -V     F      rG   r   IOTypec                   b    e Zd Z	 	 	 ddededededee         d	dfd
Z	e
ded	efd            ZdS )PDFConverterutf-8r;   Nr<   outfpcodecr=   r>   r?   c                     t                               | |||           || _        || _        |                     | j                  | _        d S r   )r8   rB   r   r   _is_binary_streamoutfp_binary)rE   r<   r   r   r=   r>   s         rF   rB   zPDFConverter.__init__+  sM     	""4("SSS"

 224:>>rG   c                     dt          | dd          v rdS t          | d          rdS t          | t          j                  rdS t          | t          j                  st          | t          j                  rdS dS )z"Test if an stream is binary or notbmodero   TF)getattrhasattrrT   ioBytesIOStringIO
TextIOBase)r   s    rF   r   zPDFConverter._is_binary_stream8  s     '%,,,,4UF## 	5rz** 	4r{++ 	z%/O/O 	5trG   )r   r;   N)r   r   r   r)   r   rR   r   r   r   rB   staticmethodr,   r   r   rq   rG   rF   r   r   *  s        
 '+? ?#? ? 	?
 ? 8$? 
? ? ? ?  4    \  rG   r   c                        e Zd Z	 	 	 	 	 ddedededed	ee         d
e	dee
         ddf fdZdeddfdZdeddfdZdededdfdZdede	de	de	dee         ddfdZ xZS )TextConverterr   r;   NFr<   r   r   r=   r>   
showpagenoimagewriterr?   c                 p    t                                          |||||           || _        || _        d S )Nr   r=   r>   )superrB   r   r   )	rE   r<   r   r   r=   r>   r   r   	__class__s	           rF   rB   zTextConverter.__init__I  s=     	%uVhWWW$&rG   r   c                 &   t          j        || j        d          }| j        rAt	          t
          | j                                      |                                           d S t	          t          | j                                      |           d S )Nignore)
r   compatible_encode_methodr   r   r   r   r   writeencoder	   rE   r   s     rF   
write_textzTextConverter.write_textW  sw    -dDJII 	14:&&,,T[[]];;;;;$$**400000rG   r   c                      dt           dd f fd j        r                     d|j        z              |                                d           d S )Nrg   r?   c                    t          | t                    r| D ]} |           n<t          | t                    r'                    |                                            t          | t
                    r                    d           d S t          | t                    r#j        j                            |            d S d S d S )N
)	rT   r   r   r   get_textr   r   r   export_image)rg   childrenderrE   s     rF   r   z,TextConverter.receive_layout.<locals>.render_  s    $,, 1! " "EF5MMMM"D&)) 1000$	** 8%%%%%D'** 8#/$11$777778 8//rG   zPage %s
)r   r   r   pageidrE   r   r   s   ` @rF   rW   zTextConverter.receive_layout^  s    
	8 
	8D 
	8 
	8 
	8 
	8 
	8 
	8 
	8 ? 	9OOK&-7888vrG   rY   re   c                 P    | j         t                              | ||           d S d S rA   )r   r   rh   )rE   rY   re   s      rF   rh   zTextConverter.render_images  s1    '%%dD&99999 ('rG   ri   rj   rk   rl   rm   c                     d S rA   rq   )rE   ri   rj   rk   rl   rm   s         rF   r   zTextConverter.paint_pathw  s	     	rG   )r   r;   NFN)r   r   r   r)   r,   rR   r   r   r   r   r   rB   r   r   rW   r+   rh   r(   r   r.   r   __classcell__)r   s   @rF   r   r   H  sk       
 '+ -1' '#' ' 	'
 ' 8$' ' k*' 
' ' ' ' ' '1s 1t 1 1 1 1V     *: :i :D : : : :  	
  {# 
       rG   r   c                       e Zd ZdddddddZddd	Z	 	 	 	 	 	 	 	 	 	 	 	 d>dedededede	e
         dededededede	e         dede	eeef                  de	eeef                  d dfd!Zd"ed dfd#Zd?d$Zd?d%Zd"ed dfd&Zd'ed(ed)ed*ed+ed,ed dfd-Zd'ed(ed.ed dfd/Zd.ed(ed)ed*ed+ed,ed dfd0Zd'ed"ed)ed*ed1ed dfd2Z	 d@d'ed(ed)ed*ed+ed,ed4ed dfd5Zd'ed dfd6Zd"ed7ed8ed dfd9Zd?d:Zd;ed dfd<Zd?d=Z dS )AHTMLConverteryellowmagentacyanredblackgray)figuretextlinetextbox	textgroupr   rH   blue)r  charr   r;   N      ?normalT2   r   r<   r   r   r=   r>   scale	fontscale
layoutmoder   
pagemarginr   r   rect_colorstext_colorsr?   c                     t                               | |||||           | j        r| j        st	          d          | j        s| j        rt	          d          |ddi}|ddd}|| _        || _        || _        |	| _        |
| _	        || _
        || _        || _        |r>| j                            | j                   | j                            | j                   | j	        | _        d | _        g | _        |                                  d S )Nr   )Codec is required for a binary I/O outputz1Codec must not be specified for a text I/O outputr  r	  r
  )r   rH   )r   rB   r   r   r%   r  r  r  r   r  r   r  r  updateRECT_COLORSTEXT_COLORS_yoffset_font
_fontstackwrite_header)rE   r<   r   r   r=   r>   r  r  r  r   r  r   r   r  r  s                  rF   rB   zHTMLConverter.__init__  sH   " 	 	 	
 	
 	
  	MTZ 	M KLLL  	UTZ 	U STTT!7+K$+V<<K
"$$$&&& 	6##D$4555##D$4555#26
=?rG   r   c                     | j         rGt          t          | j                                      |                    | j                              d S t          t          | j                                      |           d S rA   r   r   r   r   r   r   r	   r   s     rF   r   zHTMLConverter.write  f    : 	14:&&,,T[[-D-DEEEEE$$**400000rG   c                     |                      d           | j        rd| j        z  }nd}|                      |           |                      d           d S )Nz<html><head>
zA<meta http-equiv="Content-Type" content="text/html; charset=%s">
z5<meta http-equiv="Content-Type" content="text/html">
z</head><body>
)r   r   )rE   ss     rF   r"  zHTMLConverter.write_header  sf    

#$$$: 	I!#':. A
 IA

1

$%%%%%rG   c                     d t          d| j                  D             }dd                    |          z  }|                     |           |                     d           d S )Nc                      g | ]}d | d| dS )z
<a href="#z">z</a>rq   )rr   is     rF   rz   z.HTMLConverter.write_footer.<locals>.<listcomp>  s,    OOOA/1/////OOOrG   r;   z8<div style="position:absolute; top:0px;">Page: %s</div>
z, z</body></html>
)ranger=   r   r   )rE   
page_linksr'  s      rF   write_footerzHTMLConverter.write_footer  sj    OOq$+9N9NOOO
G$))K
 K
 
 	

1

%&&&&&rG   c                 J    |                      t          |                     d S rA   )r   r4   r   s     rF   r   zHTMLConverter.write_text  s     

3t99rG   colorborderwidthrs   ywrw   c                     | j                             |          }|Jd|||| j        z  | j        |z
  | j        z  || j        z  || j        z  fz  }|                     |           d S d S )Nzn<span style="position:absolute; border: %s %dpx solid; left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>
)r  getr  r  r   )	rE   r/  r0  rs   r1  r2  rw   color2r'  s	            rF   
place_rectzHTMLConverter.place_rect  s     !%%e,,K 
N]Q&$*4
N
N	  JJqMMMMM rG   rg   c                 b    |                      |||j        |j        |j        |j                   d S rA   )r6  rL   rO   widthheight)rE   r/  r0  rg   s       rF   place_borderzHTMLConverter.place_border  s,    {DGTWdj$+VVVVVrG   c                     | j         q| j                             |          }dt          |          ||| j        z  | j        |z
  | j        z  || j        z  || j        z  fz  }|                     |           d S d S )Nzd<img src="%s" border="%d" style="position:absolute; left:%dpx; top:%dpx;" width="%d" height="%d" />
)r   r   r4   r  r  r   )	rE   rg   r0  rs   r1  r2  rw   rY   r'  s	            rF   place_imagezHTMLConverter.place_image  s     '#0066DD II
N]Q&$*4
N
N	  JJqMMMMM ('rG   sizec                 "   | j                             |          }|rd||| j        z  | j        |z
  | j        z  || j        z  | j        z  fz  }|                     |           |                     |           |                     d           d S d S )NzP<span style="position:absolute; color:%s; left:%dpx; top:%dpx; font-size:%dpx;"></span>
)r  r4  r  r  r  r   r   )rE   r/  r   rs   r1  r=  r5  r'  s           rF   
place_textzHTMLConverter.place_text  s     !%%e,,. 
N]Q&$*44:%6	  JJqMMMOOD!!!JJ{##### rG   Falsewriting_modec           	          | j                             | j                   d | _        d||||| j        z  | j        |z
  | j        z  || j        z  || j        z  fz  }|                     |           d S )Nzv<div style="position:absolute; border: %s %dpx solid; writing-mode:%s; left:%dpx; top:%dpx; width:%dpx; height:%dpx;">)r!  r]   r   r  r  r   )	rE   r/  r0  rs   r1  r2  rw   rB  r'  s	            rF   	begin_divzHTMLConverter.begin_div0  s     	tz***
 DJ"dj0DJDJ 	
 	

1rG   c                     | j         |                     d           | j                                        | _         |                     d           d S )N</span>z</div>)r   r   r!  ra   )rE   r/  s     rF   end_divzHTMLConverter.end_divL  sI    :!JJy!!!_((**


8rG   fontnamer   c                    ||f}|| j         k    rh| j         |                     d           |                    d          d         }|                     d||| j        z  | j        z  fz             || _         |                     |           d S )NrF  +z.<span style="font-family: %s; font-size:%dpx">)r   r   splitr  r  r   )rE   r   rH  r   r   fontname_without_subset_tags         rF   put_textzHTMLConverter.put_textR  s    (#4:z%

9%%%*2..*=*=b*A'JJ@.4:0E0VWX   DJrG   c                 0    |                      d           d S )Nz<br>r   r   s    rF   put_newlinezHTMLConverter.put_newline`  s    

6rG   r   c                      dt           t          t          f         dd f fddt          dd f fd |            xj         j        z  c_        d S )Nrg   r?   c                     t          | t                    r'                    dd|            | D ]} |           d S d S )Nr  r;   )rT   r    r:  rg   r   rE   
show_groups     rF   rU  z0HTMLConverter.receive_layout.<locals>.show_groupd  sc    $,, &!!+q$777! & &EJu%%%%& && &rG   c           
      d   t          | t                    rxj        | j        z  c_                            dd|            j        rS                    dj        | j        z
  j        z  z                                 d| j         d| j         d           | D ]} |           | j	        | j	        D ]} |           d S d S t          | t                    r                    dd|            d S t          | t                    rU                    dd| j        | j        | j        | j                   | D ]} |                               d           d S t          | t"                    r0                    | d| j        | j        | j        | j                   d S j        d	k    rt          | t(                    r)                    d
d|            | D ]} |           d S t          | t*                    ra                    dd|                                dt/          | j        dz             | j        | j        d           | D ]} |           d S t          | t2                    rS                    dd|                                d|                                 | j        | j        | j                   d S d S t          | t(                    r3| D ]} |           j        dk    r                                 d S d S t          | t*                    rh                    dd| j        | j        | j        | j        |                                            | D ]} |                               d           d S t          | t2                    rDt=          | j                  }                     |                                 || j                   d S t          | tB                    r)"                    |                                            d S d S )NrH   r;   z*<div style="position:absolute; top:%dpx;">z	<a name="z">Page z</a></div>
r   r  exactr  r     r  loose)#rT   r   r  rO   r:  r   r   r  r   groupsr   r   rD  rL   r8  r9  rG  r   r<  r  r!   r   r@  rR   indexr   r   r=  rQ  get_writing_moder5   rH  rN  r   r   )rg   r   grouprH  r   rE   rU  s       rF   r   z,HTMLConverter.receive_layout.<locals>.renderj  s   $'' H1(!!&!T222? JJD MDG3tzAC   JJQDKQQQQQ   " " "EF5MMMM;*!% * *"
5)))) +** *D'** 81!!'1d33333D(++ 61xDGTWdj$+VVV! " "EF5MMMMX&&&&&D'** 11  q$'47DJTTTTTG++dJ// %%j!T:::!% & &u& &i00 %%iD999OO!DJN++   "& & &u& &f-- %%fa666OO	      D*-- 1! " "EF5MMMM?g--$$&&&&& .-D),, 1GGJK))++   " " "EF5MMMMY'''''D&)) 1*4=99dmmooxCCCCCD&)) 1000001 1rG   )r   r    r"   r   r  r  rE   r   r   rU  s   ` @@rF   rW   zHTMLConverter.receive_layoutc  s    	&U;0@#@A 	&d 	& 	& 	& 	& 	& 	& 	&J	1 J	1D J	1 J	1 J	1 J	1 J	1 J	1 J	1 J	1X 	v(rG   c                 .    |                                   d S rA   r-  r   s    rF   closezHTMLConverter.close      rG   )r   r;   Nr;   r  r  Tr  Nr   NNr?   N)rA  )!r   r   r   r  r  r)   r,   rR   r   r   r   r   r   r   r   rB   r   r"  r-  r   r6  r   r:  r   r<  r@  rD  rG  rN  rQ  r   rW   ra  rq   rG   rF   r  r    s        K  K '+"-104043 3#3 3 	3
 3 8$3 3 3 3 3 3 k*3 3 d38n-3 d38n-3  
!3 3 3 3j1# 1$ 1 1 1 1
& 
& 
& 
&' ' ' 's t      	
    
   2W# WC W{ Wt W W W W  	
    
   2$$ $ 	$
 $ $ 
$ $ $ $@ $   	
     
   8S T    S C 5 T       T)V T) T) T) T) T)l     rG   r  c                       e Zd Z ej        d          Z	 	 	 	 	 ddededed	e	d
e
e         de
e         deddfdZdeddfdZddZddZdeddfdZdeddfdZddZdS )XMLConverterz[ ---]r   r;   NFr<   r   r   r=   r>   r   stripcontrolr?   c                     t                               | |||||           | j        | j         k    rt	          d          || _        || _        |                                  d S )Nr   r  )r   rB   r   r   r%   r   rf  r"  )rE   r<   r   r   r=   r>   r   rf  s           rF   rB   zXMLConverter.__init__  s~     	 	 	
 	
 	
 TZ00 KLLL&(rG   r   c                     | j         rGt          t          | j                                      |                    | j                              d S t          t          | j                                      |           d S rA   r$  r   s     rF   r   zXMLConverter.write  r%  rG   c                     | j         r|                     d| j         z             n|                     d           |                     d           d S )Nz%<?xml version="1.0" encoding="%s" ?>
z<?xml version="1.0" ?>
z<pages>
r   r   r   s    rF   r"  zXMLConverter.write_header  sT    : 	3JJ?$*LMMMMJJ1222

;rG   c                 0    |                      d           d S )Nz	</pages>
rP  r   s    rF   r-  zXMLConverter.write_footer  s    

<     rG   c                     | j         r| j                            d|          }|                     t	          |                     d S Nro   )rf  CONTROLsubr   r4   r   s     rF   r   zXMLConverter.write_text  sB     	.<##B--D

3t99rG   r   c                 d     dt           dd f fddt           dd f fd |           d S )Nrg   r?   c                 d   t          | t                    r3                    d| j        t	          | j                  fz             d S t          | t                    rQ                    dt	          | j                  z             | D ]} |                               d           d S d S )Nz<textbox id="%d" bbox="%s" />
z<textgroup bbox="%s">
z</textgroup>
)rT   r   r   r[  r3   rZ   r    rT  s     rF   rU  z/XMLConverter.receive_layout.<locals>.show_group  s    $	** 	-

5z8DI#6#678     D+.. -

4x	7J7JJKKK! & &EJu%%%%

+,,,,,	- -rG   c                 N	   t          | t                    rd| j        t          | j                  | j        fz  }                    |           | D ]} |           | j        ?                    d           | j        D ]} |                               d                               d           d S t          | t                    r5d| j	        t          | j                  fz  }                    |           d S t          | t                    r5d| j	        t          | j                  fz  }                    |           d S t          | t                    rHd| j	        t          | j                  |                                 fz  }                    |           d S t          | t                    r\d| j         d	t          | j                   d
}                    |           | D ]} |                               d           d S t          | t                    rQ                    dt          | j                  z             | D ]} |                               d           d S t          | t                     rtd}t          | t"                    rd}d| j        t          | j                  |fz  }                    |           | D ]} |                               d           d S t          | t&                    rdt)          | j                  t          | j                  | j        j        | j        j        | j        fz  }                    |                               |                                                                d           d S t          | t8                    r,                    d|                                 z             d S t          | t:                    r{j        Nj                            |           }                    dt)          |          | j         | j!        fz             d S                     d| j         | j!        fz             d S J tE          d| f                      )Nz%<page id="%s" bbox="%s" rotate="%d">
z	<layout>
z
</layout>
z</page>
z"<line linewidth="%d" bbox="%s" />
z"<rect linewidth="%d" bbox="%s" />
z+<curve linewidth="%d" bbox="%s" pts="%s"/>
z<figure name="z" bbox="z">
z
</figure>
z<textline bbox="%s">
z</textline>
ro   z wmode="vertical"z<textbox id="%d" bbox="%s"%s>
z</textbox>
zD<text font="%s" bbox="%s" colourspace="%s" ncolour="%s" size="%.3f">z</text>
z<text>%s</text>
z*<image src="%s" width="%d" height="%d" />
z!<image width="%d" height="%d" />
F	Unhandled)#rT   r   r   r3   rZ   rotater   rZ  r   r   r   r   get_ptsr   rY   r!   r   r   r[  r   r4   rH  r   r   r   r=  r   r   r   r   r   r   r8  r9  rR   )	rg   r'  r   r]  wmoderY   r   rE   rU  s	         rF   r   z+XMLConverter.receive_layout.<locals>.render  s   $'' X7<KTY''K@ 
 

1! " "EF5MMMM;*JJ|,,,!% * *"
5))))JJ}---

;'''''D&)) I79NTY''=  

1D&)) C79NTY''=  

1D'** =7BNTY''LLNNF 
 

1D(++ 67QTYQQ8K8KQQQ

1! " "EF5MMMM

=)))))D*-- 07

3hty6I6IIJJJ! " "EF5MMMM

?+++++D),, +7d$566 0/E5JTY''9 
 

1! " "EF5MMMM

>*****D&)) 70 DM** ++)0	  

1000

;'''''D&)) 7

.@AAAAAD'** 7#/+88>>DJJEt99dj$+>?    
 JJ<:t{34    
 7c;"566666rG   r   r^  s   ` @@rF   rW   zXMLConverter.receive_layout  s    
	-V 
	- 
	- 
	- 
	- 
	- 
	- 
	- 
	-Z	7 Z	7D Z	7 Z	7 Z	7 Z	7 Z	7 Z	7 Z	7 Z	7x 	vrG   c                 .    |                                   d S rA   r`  r   s    rF   ra  zXMLConverter.close[  rb  rG   )r   r;   NNFrc  )r   r   r   r   compilern  r)   r,   rR   r   r   r   r   r   rB   r   r"  r-  r   r   rW   ra  rq   rG   rF   re  re    sQ       bj899G '+-1" #  	
  8$ k*  
   61# 1$ 1 1 1 1       ! ! ! !s t    
iV i i i i iV     rG   re  c                       e Zd ZdZ ej        d          Z	 	 	 	 ddeded	e	d
e
dee         defdZdede	fdZde	ddfdZddZddZde	ddfdZddZdeddfdZddZdS )HOCRConverterzKExtract an hOCR representation from explicit text information within a PDF.z[\x00-\x08\x0b-\x0c\x0e-\x1f]utf8r;   NFr<   r   r   r=   r>   rf  c                     t                               | |||||           || _        d| _        |                                  d S )Nr   F)r   rB   rf  within_charsr"  )rE   r<   r   r   r=   r>   rf  s          rF   rB   zHOCRConverter.__init__r  s]     	 	 	
 	
 	
 )!rG   rZ   r?   c                     |\  }}}}t          |          }t          | j        d         |z
            }t          |          }t          | j        d         |z
            }	d| d| d| d|	 S )Nr   zbbox  )r   	page_bbox)
rE   rZ   in_x0in_y0in_x1in_y1out_x0out_y0out_x1out_y1s
             rF   	bbox_reprzHOCRConverter.bbox_repr  s~    '+$ueUT^A&.//UT^A&.//:v::::::&:::rG   r   c                     | j         rI|                    | j                   }t          t          | j                                      |           d S t          t          | j                                      |           d S rA   )r   r   r   r   r   r   r	   )rE   r   encoded_texts      rF   r   zHOCRConverter.write  sk    : 	1;;tz22L4:&&,,\:::::$$**400000rG   c                    | j         r|                     d| j         z             n|                     d           |                     d           |                     d           |                     d           |                     d           |                     d           |                     d           |                     d	           d S )
NzQ<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en' charset='%s'>
zD<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en'>
z<head>
z<title></title>
zE<meta http-equiv='Content-Type' content='text/html;charset=utf-8' />
zA<meta name='ocr-system' content='pdfminer.six HOCR Converter' />
zR  <meta name='ocr-capabilities' content='ocr_page ocr_block ocr_line ocrx_word'/>
z</head>
z<body>
rj  r   s    rF   r"  zHOCRConverter.write_header  s    : 		JJ:<@JG   
 JJ-   	

:

&'''

T	
 	
 	
 	

P	
 	
 	
 	

C	
 	
 	
 	

;

:rG   c                 Z    |                      d           |                      d           d S )Nz0<!-- comment in the following line to debug -->
zD<!--script src='https://unpkg.com/hocrjs'></script--></body></html>
rP  r   s    rF   r-  zHOCRConverter.write_footer  s8    

FGGG

S	
 	
 	
 	
 	
rG   c                 t    | j         r| j                            d|          }|                     |           d S rm  )rf  rn  ro  r   r   s     rF   r   zHOCRConverter.write_text  s<     	.<##B--D

4rG   c                 >   t          | j                  dk    r}d}d| j        v rd}d| j        v r|dz  }|                     d| j        | j        ||                     | j                  | j        | j        | j                                        fz             d| _        d S )	Nr   ro   Italiczfont-style: italic; Boldzfont-weight: bold; zg<span style='font:"%s"; font-size:%d; %s' class='ocrx_word' title='%s; x_font %s; x_fsize %d'>%s</span>F)	rS   working_textworking_fontr   working_sizer  working_bboxstripr~  )rE   bold_and_italic_styless     rF   
write_wordzHOCRConverter.write_word  s    t !!A%%%'"4,,,)?&***&*??&JJ(
 )).t'899)))//11	    "rG   r   c                 @     dt           dd f fd |           d S )Nrg   r?   c                 t   j         r)t          | t                    r                                 t          | t                    rl| j        _                            d| j        d	                    | j                  d           | D ]} |                               d           d S t          | t                    rW                    d	                    | j                  z             | D ]} |                               d           d S t          | t                    r^                    d| j        	                    | j                  fz             | D ]} |                               d           d S t          | t                    rj         sFd_         |                                 _        | j        _        | j        _        | j        _        d S t+          |                                                                           d	k    r=                                                     |                                            d S j        d
         | j        d
         k    s j        | j        k    sj        | j        k    r8                                 | j        _        | j        _        | j        _        xj        |                                 z  c_        j        d	         j        d
         | j        d         j        d         f_        d S d S )Nz<div class='ocr_page' id='z	' title='z'>
z</div>
z"<span class='ocr_line' title='%s'>r?  z+<div class='ocr_block' id='%d' title='%s'>
Tr   r;   r   r   )r~  rT   r   r  r   rZ   r  r   r   r  r!   r   r[  r   r   r  r  rH  r  r=  r  rS   r  )rg   r   
child_liner   rE   s      rF   r   z,HOCRConverter.receive_layout.<locals>.render  s1     "Zf%=%= "!!!$'' 2!%


{{{DNN49$=$=$=$=?   " " "EF5MMMM

:&&&&&D*-- )

8DNN49<U<UV   #' ' 'JF:&&&&

;'''''D),, "

Bz4>>$)#<#<=>   " " "EF5MMMM

:&&&&&D&)) ( (,D%(,D%(,	D%(,D%(,	D%%%..0011Q66OO%%%JJt}}///// )!,	!<<,==,	99))),0I),0M),0I)%%8%%)!,)!,	!)!,	)D%%%+ rG   rw  r   s   ` @rF   rW   zHOCRConverter.receive_layout  sI    5	 5	D 5	 5	 5	 5	 5	 5	 5	n 	vrG   c                 .    |                                   d S rA   r`  r   s    rF   ra  zHOCRConverter.close  rb  rG   )r|  r;   NFrc  )r   r   r   __doc__r   ry  rn  r)   r,   rR   r   r   r   r   rB   r0   r  r   r"  r-  r   r  r   rW   ra  rq   rG   rF   r{  r{  _  sm       UU  bj9::G '+" #  	
  8$    *;d ;s ; ; ; ;1# 1$ 1 1 1 1   4
 
 
 
s t    
" " " "28V 8 8 8 8 8t     rG   r{  )Nr   loggingr   typingr   r   r   r   r   r   r	   r
   r   r   r   pdfminerr   pdfminer.imager   pdfminer.layoutr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   pdfminer.pdfcolorr#   pdfminer.pdfdevicer$   pdfminer.pdfexceptionsr%   pdfminer.pdffontr&   r'   pdfminer.pdfinterpr(   r)   pdfminer.pdfpager*   pdfminer.pdftypesr+   pdfminer.utilsr,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   	getLoggerr   r   r8   r   r   r   r   r  re  r{  rq   rG   rF   <module>r     sE   				  				                                & & & & & &                                         * , + + + + + , , , , , , 0 0 0 0 0 0 : : : : : : : : B B B B B B B B $ $ $ $ $ $ ' ' ' ' ' '                          g!!P P P P P P P Pf    )   & 
68U	3	3    $gfo   <7 7 7 7 7L' 7 7 7tx x x x xL' x x xv	_ _ _ _ _<& _ _ _Dq q q q qL' q q q q qrG   