
    .id=                     j   d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZmZ ddlmZmZ ddlmZm Z m!Z!m"Z"m#Z# ddl$m%Z%m&Z&  ej'        e(          Z) G d	 d
e          Z* G d d          Z+ G d de+          Z, G d de+          Z- G d de-          Z. G d de+          Z/ G d de/          Z0 G d de,          Z1 G d de/          Z2 G d de,          Z3 G d de/          Z4 G d d           Z5 G d! d"e"e                    Z6dS )#zAdobe character mapping (CMap) support.

CMaps provide the mapping between character codes and Unicode
code-points to character ids (CIDs).

More information is available on:

  https://github.com/adobe-type-tools/cmap-resources

    N)AnyBinaryIODictIterableIteratorListMutableMappingOptionalSetTextIOTupleUnioncast)name2unicode)PDFExceptionPDFTypeError)PSEOFPSSyntaxError)KWD	PSKeyword	PSLiteralPSStackParserliteral_name)choplistnunpackc                       e Zd ZdS )	CMapErrorN__name__
__module____qualname__     K/var/www/html/analyses/venv/lib/python3.11/site-packages/pdfminer/cmapdb.pyr   r   ,   s        Dr#   r   c                       e Zd ZdZdeddfdZdefdZdededdfd	Z	d
ede
ddfdZde
d
eeee
f         ddfdZddZd
edee
         fdZdS )CMapBaser   kwargsreturnNc                 8    |                                 | _        d S N)copyattrsselfr'   s     r$   __init__zCMapBase.__init__3   s    28++--


r#   c                 @    | j                             dd          dk    S )NWModer   r,   getr.   s    r$   is_verticalzCMapBase.is_vertical6   s    z~~gq))Q..r#   kvc                     || j         |<   d S r*   )r,   )r.   r6   r7   s      r$   set_attrzCMapBase.set_attr9   s    
1r#   codecidc                     d S r*   r"   )r.   r:   r;   s      r$   add_code2cidzCMapBase.add_code2cid<       r#   c                     d S r*   r"   )r.   r;   r:   s      r$   add_cid2unichrzCMapBase.add_cid2unichr?   r>   r#   cmapc                     d S r*   r"   )r.   rA   s     r$   use_cmapzCMapBase.use_cmapB   r>   r#   c                     t           r*   )NotImplementedError)r.   r:   s     r$   decodezCMapBase.decodeE   s    !!r#   )rA   r&   r(   N)r   r    r!   debugobjectr/   boolr5   strr9   intr=   r   r   bytesr@   rC   r   rF   r"   r#   r$   r&   r&   0   s       E@ @D @ @ @ @/T / / / /# & T     3 4    # U9eS3H-I d       "5 "Xc] " " " " " "r#   r&   c            	           e Zd Zdeeef         ddfdZdefdZdeddfdZ	de
dee         fd	Zej        dd
fdedeeeef                  deedf         ddfdZdS )CMapr'   r(   Nc                 8    t          j        | fi | i | _        d S r*   )r&   r/   code2cidr-   s     r$   r/   zCMap.__init__J   s&    $))&)))+-r#   c                 <    d| j                             d          z  S )Nz
<CMap: %s>CMapNamer2   r4   s    r$   __repr__zCMap.__repr__N   s    djnnZ8888r#   rA   c                    t          |t                    s$J t          t          |                                dt          t
          t          f         dt          t
          t          f         dd ffd | j        |j                   d S )Ndstsrcr(   c                     |                                 D ]3\  }}t          |t                    ri }|| |<    ||           .|| |<   4d S r*   )items
isinstancedict)rU   rV   r6   r7   dr+   s        r$   r+   zCMap.use_cmap.<locals>.copyT   sd    		  1a&& +-ACFDAJJJJCFF r#   )rY   rN   rJ   typer   rK   rH   rP   )r.   rA   r+   s     @r$   rC   zCMap.use_cmapQ   s    $%%66s4::666	d3;' 	d3;.? 	D 	 	 	 	 	 	 	T]DM*****r#   r:   c              #   ,  K   t                               d| |           | j        }t          |          D ]^}||v rQ||         }t	          |t
                    r|V  | j        }/t          t          t
          t          f         |          }W| j        }_d S )Nzdecode: %r, %r)	logrG   rP   iterrY   rK   r   r   rH   )r.   r:   r[   ixs        r$   rF   zCMap.decode_   s      		"D$///Md 		" 		"AAvvaDa%% 3GGGAAT#v+.22AAM		" 		"r#   r"   outrP   .c           	      P   |	| j         }d}t          |                                          D ]x\  }}||fz   }t          |t                    r|                    d||fz             ;|                     |t          t          t          t          f         |          |           yd S )Nr"   zcode %r = cid %d
)rb   rP   r:   )
rP   sortedrX   rY   rK   writedumpr   r   rH   )r.   rb   rP   r:   r6   r7   cs          r$   rf   z	CMap.dumpm   s     }HD8>>++,, 	P 	PDAqtA!S!! P		.!Q78888		cDc6k1BA,F,FQ	OOOO	P 	Pr#   )r   r    r!   r   rJ   rK   r/   rS   r&   rC   rL   r   rF   sysstdoutr   r
   r   rH   r   rf   r"   r#   r$   rN   rN   I   s       .sCx .T . . . .9# 9 9 9 9+X +$ + + + +"5 "Xc] " " " "  j04 "	P PP 4V,-P CHo	P
 
P P P P P Pr#   rN   c                   .    e Zd Zdedeedf         fdZdS )IdentityCMapr:   r(   .c                 ^    t          |          dz  }|rt          j        d|z  |          S dS )N   z>%dHr"   lenstructunpackr.   r:   ns      r$   rF   zIdentityCMap.decode   s4    IIN 	=!T2222r#   Nr   r    r!   rL   r   rK   rF   r"   r#   r$   rk   rk   ~   >        5 U38_      r#   rk   c                   .    e Zd Zdedeedf         fdZdS )IdentityCMapByter:   r(   .c                 X    t          |          }|rt          j        d|z  |          S dS )Nz>%dBr"   rn   rr   s      r$   rF   zIdentityCMapByte.decode   s0    II 	=!T2222r#   Nrt   r"   r#   r$   rw   rw      ru   r#   rw   c                   h    e Zd Zdeeef         ddfdZdefdZdedefdZe	j
        fdeddfd	ZdS )

UnicodeMapr'   r(   Nc                 8    t          j        | fi | i | _        d S r*   )r&   r/   
cid2unichrr-   s     r$   r/   zUnicodeMap.__init__   s&    $))&)))*,r#   c                 <    d| j                             d          z  S )Nz<UnicodeMap: %s>rR   r2   r4   s    r$   rS   zUnicodeMap.__repr__   s    !DJNN:$>$>>>r#   r;   c                 T    t                               d| |           | j        |         S )Nget_unichr: %r, %r)r^   rG   r|   r.   r;   s     r$   
get_unichrzUnicodeMap.get_unichr   s&    		&c222s##r#   rb   c                     t          | j                                                  D ]\  }}|                    d||fz              d S )Nzcid %d = unicode %r
)rd   r|   rX   re   )r.   rb   r6   r7   s       r$   rf   zUnicodeMap.dump   sT    4?002233 	8 	8DAqII-A67777	8 	8r#   )r   r    r!   r   rJ   rK   r/   rS   r   rh   ri   r   rf   r"   r#   r$   rz   rz      s        -sCx -T - - - -?# ? ? ? ?$c $c $ $ $ $ "% 8 8 8 8 8 8 8 8 8r#   rz   c                       e Zd ZdedefdZdS )IdentityUnicodeMapr;   r(   c                 X    t                               d| |           t          |          S )z+Interpret character id as unicode codepointr   )r^   rG   chrr   s     r$   r   zIdentityUnicodeMap.get_unichr   s$    		&c2223xxr#   N)r   r    r!   rK   rJ   r   r"   r#   r$   r   r      s6        c c      r#   r   c                   "    e Zd ZdededdfdZdS )FileCMapr:   r;   r(   Nc                    t          |t                    rt          |t                    s3J t          t          |          t          |          f                      | j        }|d d         D ]L}t          |          }||v r.t          t          t          t          f         ||                   }Ci }|||<   |}Mt          |d                   }|||<   d S )N)	rY   rJ   rK   r\   rP   ordr   r   rH   )r.   r:   r;   r[   rg   cits          r$   r=   zFileCMap.add_code2cid   s    $$$ 	
C)=)= 	
 	
s$ZZc#@
 @
 	
 	
 	
 Mcrc 	 	AQBQwwc6k*AbE22')"b]]"r#   )r   r    r!   rJ   rK   r=   r"   r#   r$   r   r      s=         3 4      r#   r   c                   4    e Zd Zdedeeeef         ddfdZdS )FileUnicodeMapr;   r:   r(   Nc                 $   t          |t                    s$J t          t          |                                t          |t                    r1t          |j        t                    sJ t          |j                  }n`t          |t                    r|                    dd          }n4t          |t                    rt          |          }nt          |          |dk    r | j                            |          dk    rd S || j        |<   d S )NzUTF-16BEignore     )rY   rK   rJ   r\   r   namer   rL   rF   r   r   r|   r3   )r.   r;   r:   unichrs       r$   r@   zFileUnicodeMap.add_cid2unichr   s    #s##33Sc^^333dI&& 
	%di-----!$),,FFe$$ 	%[[X66FFc"" 	%YYFFt$$$ X$/"5"5c":":c"A"AF%r#   )r   r    r!   rK   r   r   rL   r@   r"   r#   r$   r   r      sI        &# &U9eS3H-I &d & & & & & &r#   r   c                   ,     e Zd Zdededdf fdZ xZS )PyCMapr   moduler(   Nc                     t                                          |           |j        | _        |j        rd| j        d<   d S d S N)rR      r1   )superr/   CODE2CIDrP   IS_VERTICALr,   )r.   r   r   	__class__s      r$   r/   zPyCMap.__init__   sN    $''' 	$"#DJw	$ 	$r#   )r   r    r!   rJ   r   r/   __classcell__r   s   @r$   r   r      sR        $S $# $$ $ $ $ $ $ $ $ $ $ $r#   r   c                   0     e Zd Zdedededdf fdZ xZS )PyUnicodeMapr   r   verticalr(   Nc                     t                                          |           |r|j        | _        d| j        d<   d S |j        | _        d S r   )r   r/   CID2UNICHR_Vr|   r,   CID2UNICHR_H)r.   r   r   r   r   s       r$   r/   zPyUnicodeMap.__init__   sP    $''' 	2$1DO"#DJw$1DOOOr#   )r   r    r!   rJ   r   rI   r/   r   r   s   @r$   r   r      sY        2S 2# 2 2$ 2 2 2 2 2 2 2 2 2 2r#   r   c                       e Zd ZU i Zeeef         ed<   i Zeee	e
         f         ed<    G d de          Zededefd            Zededefd            Zedded
edefd            ZdS )CMapDB_cmap_cache_umap_cachec                       e Zd ZdS )CMapDB.CMapNotFoundNr   r"   r#   r$   CMapNotFoundr      s        r#   r   r   r(   c           	         |                     dd          }d|z  }t                              d|           t          j                            dd          t          j                            t          j                            t                    d          f}|D ])}t          j                            ||          }t          j        
                    |          }t          j        
                    |          }|                    |t          j        z             st          j                            |          rt          j        |          }	 t!          t#          |          dt%          j        |                                                    |                                 c S # |                                 w xY w+t,                              |          )	N  z%s.pickle.gzzloading: %r	CMAP_PATHz/usr/share/pdfminer/rA   r"   )replacer^   rG   osenvironr3   pathjoindirname__file__realpath
startswithsepexistsgzipopenr\   rJ   pickleloadsreadcloser   r   )	clsr   filename
cmap_paths	directoryr   resolved_pathresolved_directorygzfiles	            r$   
_load_datazCMapDB._load_data   sr   ||D"%%!D(		-&&&JNN;(>??GLL22F;;

 $ 	# 	#I7<<	844DG,,T22M!#!1!1)!<!< ++,>,GHH w~~m,, #=11#D		2v|FKKMM/J/JKKLLNNNNNNFLLNNNN# !!$'''s   AF++Gc                 P   |dk    rt          d          S |dk    rt          d          S |dk    rt          d          S |dk    rt          d          S 	 | j        |         S # t          $ r Y nw xY w|                     |          }t          ||          x| j        |<   }|S )Nz
Identity-Hr   )r1   z
Identity-Vr   OneByteIdentityHOneByteIdentityV)rk   rw   r   KeyErrorr   r   )r   r   datarA   s       r$   get_cmapzCMapDB.get_cmap  s    <a((((\!!a(((('''#!,,,,'''#!,,,,	?4(( 	 	 	D	~~d##'-dD'9'99s   A' '
A43A4Fr   c                     	 | j                  |         S # t          $ r Y nw xY w|                     dz            fddD             | j         <   | j                  |         S )Nzto-unicode-%sc                 2    g | ]}t          |          S r"   )r   ).0r7   r   r   s     r$   
<listcomp>z*CMapDB.get_unicode_map.<locals>.<listcomp>  s%     T T TdD!!<!< T T Tr#   )FT)r   r   r   )r   r   r   r   s    ` @r$   get_unicode_mapzCMapDB.get_unicode_map  s    	?4(22 	 	 	D	~~o455 T T T T Tm T T Tt$X..s    
$$N)F)r   r    r!   r   r   rJ   r   __annotations__r   r   r   r   r   classmethodr   r   r&   r   rI   rz   r   r"   r#   r$   r   r      s        %'Kc6k"'''13Kc4--.333    y    (c (c ( ( ( [(0 C H    [" / /3 /$ /: / / / [/ / /r#   r   c                      e Zd ZdededdfdZddZ ed          Z ed          Z	 ed	          Z
 ed
          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          ZdededdfdZdeddfdZdS )
CMapParserrA   fpr(   Nc                 r    t          j        | |           || _        d| _        t	                      | _        d S )NT)r   r/   rA   _in_cmapset	_warnings)r.   rA   r   s      r$   r/   zCMapParser.__init__  s2    tR(((	#&55r#   c                 R    	 |                                   d S # t          $ r Y d S w xY wr*   )
nextobjectr   r4   s    r$   runzCMapParser.run&  s?    	OO 	 	 	DD	s    
&&s	   begincmaps   endcmaps   usecmaps   defs   begincodespaceranges   endcodespaceranges   begincidranges   endcidranges   begincidchars
   endcidchars   beginbfranges
   endbfranges   beginbfchars	   endbfchars   beginnotdefranges   endnotdefrangepostokenc                 Z   || j         u rd| _        |                                  dS || j        u r	d| _        dS | j        sdS || j        u rZ	 |                     d          \  \  }}\  }}| j                            t          |          |           n# t          $ r Y nw xY wdS || j
        u r}	 |                     d          \  \  }}| j                            t                              t          |                               n # t          $ r Y nt          j        $ r Y nw xY wdS || j        u r|                                  dS || j        u r|                                  dS || j        u r|                                  dS || j        u rd |                                 D             }t'          d|          D ]\  }}	}
t)          |t*                    s|                     d           2t)          |	t*                    s|                     d	           ]t)          |
t.                    s|                     d
           t1          |          t1          |	          k    r|                     d           |dd         }|	dd         }||k    r|                     d           |dd         }|	dd         }t3          |          }t3          |          }t1          |          }t5          ||z
  dz             D ]D}|t7          j        d||z             | d         z   }| j                            |
|z   |           EdS || j        u r|                                  dS || j        u r{d |                                 D             }t'          d|          D ]J\  }
}t)          |t*                    r0t)          |
t.                    r| j                            |
|           KdS || j         u r|                                  dS || j!        u rd |                                 D             }t'          d|          D ]\  }}	}t)          |t*                    s|                     d           2t)          |	t*                    s|                     d           ]t1          |          t1          |	          k    r|                     d           t3          |          }t3          |	          }t)          |tD                    rrt1          |          ||z
  dz   k    r|                     d           tG          t5          ||dz             |          D ] \  }
}| j                            |
|           !8t)          |t*                    sJ |dd         }t3          |          }|dd         }t1          |          }t5          ||z
  dz             D ]D}|t7          j        d||z             | d         z   }| j                            ||z   |           EdS || j$        u r|                                  dS || j%        u rd |                                 D             }t'          d|          D ]W\  }
}t)          |
t*                    r=t)          |t*                    r(| j                            t3          |
          |           XdS || j&        u r|                                  dS || j'        u r|                                  dS | (                    ||f           dS )z[ToUnicode CMaps

        See Section 5.9.2 - ToUnicode CMaps of the PDF Reference.
        TNFrm   r   c                     g | ]\  }}|S r"   r"   r   __objs      r$   r   z)CMapParser.do_keyword.<locals>.<listcomp>l      777IRC777r#      z0The start object of begincidrange is not a byte.z.The end object of begincidrange is not a byte.z.The cid object of begincidrange is not a byte.z?The start and end byte of begincidrange have different lengths.zGThe prefix of the start and end byte of begincidrange are not the same.z>Lc                     g | ]\  }}|S r"   r"   r   s      r$   r   z)CMapParser.do_keyword.<locals>.<listcomp>  r   r#   c                     g | ]\  }}|S r"   r"   r   s      r$   r   z)CMapParser.do_keyword.<locals>.<listcomp>  r   r#   zThe start object is not a byte.zThe end object is not a byte.z.The start and end byte have different lengths.zPThe difference between the start and end offsets does not match the code length.c                     g | ]\  }}|S r"   r"   r   s      r$   r   z)CMapParser.do_keyword.<locals>.<listcomp>  r   r#   ))KEYWORD_BEGINCMAPr   popallKEYWORD_ENDCMAPKEYWORD_DEFpoprA   r9   r   r   KEYWORD_USECMAPrC   r   r   r   KEYWORD_BEGINCODESPACERANGEKEYWORD_ENDCODESPACERANGEKEYWORD_BEGINCIDRANGEKEYWORD_ENDCIDRANGEr   rY   rL   
_warn_oncerK   ro   r   rangerp   packr@   KEYWORD_BEGINCIDCHARKEYWORD_ENDCIDCHARKEYWORD_BEGINBFRANGEKEYWORD_ENDBFRANGElistzipKEYWORD_BEGINBFCHARKEYWORD_ENDBFCHARKEYWORD_BEGINNOTDEFRANGEKEYWORD_ENDNOTDEFRANGEpush)r.   r   r   _r6   r7   cmapnameobjs
start_byteend_byter;   start_prefix
end_prefixsvarevarstartendvlenr`   ra   r:   unicode_valuevarbaseprefixs                            r$   
do_keywordzCMapParser.do_keyword=  s   
 D*** DMKKMMMFd***!DMF} 	FD$$$#'88A;; !Q!Q	""<??A6666    FD(((#'88A;; !X	""6??<3I3I#J#JKKKK    &   FD444KKMMMFD222KKMMMFD...KKMMMFD,,,77777D-5a->-> 9 9)
Hc!*e44 OO$VWWW!(E22 OO$TUUU!#s++ OO$TUUUz??c(mm33OO-   )#2#%crc]
:--OO:   !"##}dmm4yysU{Q// 9 9A$v{4'C'CTEFF'KKAI,,S1Wa88889 FD---KKMMMFD+++77777D%a.. 8 8	TdE** 8z#s/C/C 8I,,S$777FD---KKMMMFD+++77777D.6q$.?.? ? ?*
Hd!*e44 OO$EFFF!(E22 OO$CDDDz??c(mm33OO$TUUU
++h''dD)) ?4yyC%K!O33F   /2%sQw2G2G.N.N E E*]	00mDDDDE &dE22222rss)C"3<<D!#2#YFs88D"3;?33 ? ?"V[tax%@%@$%HH	00A>>>>? FD,,,KKMMMFD***77777D%a.. A A	Tc5)) Aju.E.E AI,,WS\\4@@@FD111KKMMMFD///KKMMMF		3,s,   AB 
B B /AD	 	
D&D&%D&msgc                     || j         vr;| j                             |           d}t                              ||z              dS dS )z!Warn once for each unique messagezIgnoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. N)r   addr^   warning)r.   r  base_msgs      r$   r   zCMapParser._warn_once  sU    dn$$Ns###/ 
 KK3''''' %$r#   )r(   N)r   r    r!   r&   r   r/   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  rK   r   r  rJ   r   r"   r#   r$   r   r     s       )X )8 ) ) ) ) )    L))c*ooOc*ooO#f++K"%#&<"="= #$8 9 9C 011#n--3//]++3//]++#n--L))"s#677 S!233U c U ) U  U  U  U  U n	(c 	(d 	( 	( 	( 	( 	( 	(r#   r   )7__doc__r   loggingr   os.pathr   rp   rh   typingr   r   r   r   r   r   r	   r
   r   r   r   r   r   pdfminer.encodingdbr   pdfminer.pdfexceptionsr   r   pdfminer.psexceptionsr   r   pdfminer.psparserr   r   r   r   r   pdfminer.utilsr   r   	getLoggerr   r^   r   r&   rN   rk   rw   rz   r   r   r   r   r   r   r   r"   r#   r$   <module>r(     s  	 	   				       



                               - , , , , , = = = = = = = = 6 6 6 6 6 6 6 6 T T T T T T T T T T T T T T , , , , , , , ,g!!	 	 	 	 	 	 	 	" " " " " " " "22P 2P 2P 2P 2P8 2P 2P 2Pj    8       |   8 8 8 8 8 8 8 8"           t   $& & & & &Z & & &*$ $ $ $ $T $ $ $2 2 2 2 2: 2 2 2:/ :/ :/ :/ :/ :/ :/ :/z( ( ( ( (y) ( ( ( ( (r#   