
    .i8                     8   d Z ddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZ ddlZddlmZmZmZ ddlmZmZmZmZ ddlmZ ddlmZ dd	l m!Z!m"Z"m#Z#m$Z$ dd
l%m&Z&m'Z'm(Z( ddl)m*Z*  ej+                      ej,        e-          Z. ej/        d          Z0dee1e2f         de1fdZ3d*dede4dee1         ddfdZ5	 d+dedede6ddfdZ7	 	 d,dededee1         de6ddf
dZ8	 	 	 	 d-dede1de	de
e9         de1de6dee1         d ee1         ddfd!Z: e&d"          Z; e&d#          Z<de1de1d e1ddfd$Z=	 	 	 	 	 d.dede1dee9         de
e9         de1de6dee1         d ee1         de6ddfd%Z>defd&Z?d*d'eee1                  ddfd(Z@e-d)k    r e@             dS dS )/z#Extract pdf structure in XML format    N)ArgumentParser)	Any	ContainerDictIterableListOptionalTextIOUnioncast)PDFDocumentPDFNoOutlinesPDFXRefFallback)
PDFIOErrorPDFObjectNotFoundPDFTypeErrorPDFValueError)PDFPage)	PDFParser)	PDFObjRef	PDFStreamresolve1stream_value)LIT	PSKeyword	PSLiteral)isnumberz&[\000-\037&<>()"\042\047\134\177-\377]sreturnc                     t          | t                    rt          | d          }n| }t                              d |          S )Nzlatin-1c                 L    dt          |                     d                    z  S )Nz&#%d;r   )ordgroup)ms    P/var/www/html/analyses/venv/lib/python3.11/site-packages/../../../bin/dumppdf.py<lambda>zescape.<locals>.<lambda>$   s    3qwwqzz??!:     )
isinstancebytesstrESC_PATsub)r   uss     r%   escaper.      sB    !U I;;::B???r'   outobjcodecc                    ||                      d           d S t          |t                    r|                      dt          |          z             |                                D ]W\  }}|                      d|z             |                      d           t          | |           |                      d           X|                      d           d S t          |t                    rf|                      dt          |          z             |D ]'}t          | |           |                      d           (|                      d	           d S t          |t          t          f          r6|                      d
t          |          t          |          fz             d S t          |t                    r |dk    r(|                      |                                           n|dk    r(|                      |                                           n|                      d           t          | |j                   |                      d           |dk    rH|                                }|                      dt          |          t          |          fz             |                      d           d S t          |t                    r|                      d|j        z             d S t          |t                     r|                      d|j        z             d S t          |t$                    r|                      d|j        z             d S t'          |          r|                      d|z             d S t)          |          )Nz<null />z<dict size="%d">
z<key>%s</key>
z<value>z	</value>
z</dict>z<list size="%d">

z</list>z<string size="%d">%s</string>rawbinaryz<stream>
<props>
z

</props>
textz<data size="%d">%s</data>
z	</stream>z<ref id="%d" />z<keyword>%s</keyword>z<literal>%s</literal>z<number>%s</number>)writer(   dictlenitemsdumpxmllistr*   r)   r.   r   get_rawdataget_dataattrsr   objidr   namer   r   r   )r/   r0   r1   kvdatas         r%   r;   r;   '   s?   
{		*#t 		&S1222IIKK 	$ 	$DAqII'!+,,,IIi   COOOIIl####		)#t 		&S1222 	 	ACOOOIIdOOOO		)#U|$$ 		1SXXvc{{4KKLLL#y!! E>>IIcoo''((((hIIcllnn%%%%II+,,,C###IIn%%%||~~		73t99fTll:SSTTTIIk"""#y!! 		#ci/000#y!! 		)CH4555#y!! 		)CH4555}} 		'#-...
s

r'   Fdocshow_fallback_xrefc                 d   |j         D ]e}t          |t                    r|rL|                     d           t	          | |                                           |                     d           ft          d |j         D                       }|r |s d}t                              |           d S d S d S )Nz
<trailer>
z
</trailer>

c              3   @   K   | ]}t          |t                    V  d S N)r(   r   ).0xrefs     r%   	<genexpr>zdumptrailers.<locals>.<genexpr>r   s,      KK:dO44KKKKKKr'   zThis PDF does not have an xref. Use --show-fallback-xref if you want to display the content of a fallback xref that contains all objects.)	xrefsr(   r   r7   r;   get_trailerallloggerwarning)r/   rE   rF   rK   no_xrefsmsgs         r%   dumptrailersrT   h   s    
 	 * *$00 	*4F 	*IIm$$$C))++,,,II()))KKKKKKKH * $ 	
 	s   r'   c                    t                      }|                     d           |j        D ]}|                                D ]}||v r|                    |           	 |                    |          }|5|                     d|z             t          | ||           |                     d           u# t          $ r}t          d|z             Y d }~d }~ww xY wt          | ||           |                     d           d S )Nz<pdf>z<object id="%d">
r1   z
</object>

znot found: %rz</pdf>)
setr7   rM   
get_objidsaddgetobjr;   r   printrT   )	r/   rE   r1   rF   visitedrK   r@   r0   es	            r%   dumpallobjsr^   |   s=    eeGIIg	 + +__&& 	+ 	+EKK+jj'';		.6777S....		+,,,,$ + + +o)********+	+ c-...IIhs   B66?B66
C CC outfpfnameobjidspagenospassworddumpall
extractdirc                 Z   t          |d          }t          |          }	t          |	|          d t          t	          j                  d          D             }
dt          dt          ffd}	                                 }| 	                    d           |D ]>\  }}}}}d }|r ||          }|
|d         j
                 }n||rz|}t          |t                    rc|                    d	          }|rLt          |          d
k    r9|                    d          r$ ||d                   }|
|d         j
                 }t          |          }| 	                    d|d| d           |:| 	                    d           t!          | |           | 	                    d           || 	                    d|z             | 	                    d           @| 	                    d           n# t"          $ r Y nw xY w|	                                 |                                 d S )Nrbc                 $    i | ]\  }}|j         |S  )pageid)rJ   pagenopages      r%   
<dictcomp>zdumpoutline.<locals>.<dictcomp>   s0       VT 	V  r'      destr   c                    t          | t          t          f          r#t                              |                     } n<t          | t
                    r't                              | j                            } t          | t                    r| d         } t          | t                    r| 	                                } | S )ND)
r(   r*   r)   r   get_destr   rA   r8   r   resolve)rp   rE   s    r%   resolve_destz!dumpoutline.<locals>.resolve_dest   s    dS%L)) 	5CLL..//DDi(( 	5CLL3344DdD!! 	9DdI&& 	"<<>>Dr'   z<outlines>
r   Sz/'GoTo'rr   z<outline level="z	" title="z">
z<dest>z</dest>
z<pageno>%r</pageno>
z</outline>
z</outlines>
)openr   r   	enumerater   create_pagesobjectr   get_outlinesr7   r@   r(   r8   getreprr.   r;   r   close)r`   ra   rb   rc   rd   re   r1   rf   fpparserpagesru   outlinesleveltitlerp   aserl   actionsubtyper   rE   s                         @r%   dumpoutliner      sf    
eT		Br]]F
fh
'
'C '(<S(A(A1EE  E
	6 	c 	 	 	 	 	 	##%%N###)1 	( 	(%E5$2F 	6#|D))tAw}- 6fd++ 6$jjooG 64==I#=#=&**S//#=+|F3K88!&tAw}!5uAKKD5DDQDDDEEEH%%%t$$$K(((!3f<===KK''''O$$$$   
LLNNNHHJJJJJs   2F G3 3
H ?H FilespecEmbeddedFilec                 "  
 dt           dt          t          t          f         dd f
fd}t	          | d          5 }t          |          }t          ||          
t                      }
j        D ]}|	                                D ]m}

                    |          }	||vrRt          |	t                    r=|	                    d          t          u r!|                    |            |||	           n	 d d d            d S # 1 swxY w Y   d S )Nr@   r0   r   c                    t           j                            |                    d          p9t	          t
          |                    d                                                              }|d                             d          p|d                             d          }                    |j                  }t          |t                    sd|z  }t          |          |                    d          t          urt          d|z            t           j                            	d| |fz            }t           j                            |          rt          d|z            t!          d	|z             t          j        t           j                            |          d
           t'          |d          }|                    |                                           |                                 d S )NUFFEFz:unable to process PDF: reference for %r is not a PDFStreamTypez>unable to process PDF: reference for %r is not an EmbeddedFilez%.6d-%szfile exists: %rzextracting: %rT)exist_okwb)ospathbasenamer|   r   r)   decoderZ   r@   r(   r   r   LITERAL_EMBEDDEDFILEjoinexistsr   r[   makedirsdirnamerw   r7   r>   r~   )
r@   r0   filenamefilereffileobj	error_msgr   r/   rE   rf   s
           r%   extract1z!extractembedded.<locals>.extract1   s   7##CGGDMM$WT%5N5N5U5U5W5WXXd)--%%;Ts););**W]++'9-- 	+&'   	***;;v&:::),46   w||J	UH4E(EFF7>>$ 	7.5666%&&&
BGOOD))D99994		'""$$%%%		r'   rh   r   )intr   r*   r   rw   r   r   rW   rM   rX   rZ   r(   r8   r|   LITERAL_FILESPECrY   )ra   rd   rf   r   r   r   extracted_objidsrK   r@   r0   rE   s     `       @r%   extractembeddedr      s{    $sCx. T       0 
eT		 )b2&(++55I 		) 		)D** ) )jj''!111"3-- 2+;;;$((///HUC((()		)	) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )s   B:DDDc	                 T   t          |d          }	t          |	          }
t          |
|          }|r,|D ])}|                    |          }t	          | ||           *|rnt          t          j        |                    D ]L\  }}||v rC|r,|j        D ]#}t          |          }t	          | ||           $7t	          | |j
                   M|rt          | |||           |s|s|st          | ||           |	                                 |dvr|                     d           d S d S )Nrh   rV   )r4   r5   r3   )rw   r   r   rZ   r;   rx   r   ry   contentsr   r?   r^   rT   r~   r7   )r`   ra   rb   rc   rd   re   r1   rf   rF   r   r   rE   r@   r0   rl   rm   s                   r%   dumppdfr      s}    
eT		Br]]F
fh
'
'C - 	- 	-E**U##CE3e,,,,, /%g&:3&?&?@@ 	/ 	/LFD   /#} 9 9*3//s%888889 E4:... ;E3'9::: 5W 5w 5UC!3444HHJJJ%%%D &%r'   c                  >   t          t          d          } |                     dt          d dd           |                     ddd	d
t          j                    |                     ddddd           |                                 }|                    ddddd           |                    ddt          d           |                     dd          }|                    dt          d dd           |                    ddt          d            |                    d!d"t          d#           |                    d$d%ddd&           |                    d'dd()           |                    d*d+t          d,d-.           |                     d/d0          }|                    d1d2t          d3d4.           |                                }|                    d5d6ddd7           |                    d8d9ddd:           |                    d;d<ddd=           | S )>NT)descriptionadd_helpfiles+zOne or more paths to PDF files.)typedefaultnargshelpz	--versionz-vversionzpdfminer.six v)r   r   z--debugz-dF
store_truezUse debug logging level.)r   r   r   z--extract-tocz-TzExtract structure of outlinez--extract-embeddedz-EzExtract embedded files)r   r   ParserzUsed during PDF parsing)r   z--page-numbersz0A space-seperated list of page numbers to parse.z	--pagenosz-pzA comma-separated list of page numbers to parse. Included for legacy applications, use --page-numbers for more idiomatic argument entry.z	--objectsz-iz1Comma separated list of object numbers to extractz--allz-az3If the structure of all objects should be extractedz--show-fallback-xrefzAdditionally show the fallback xref. Use this if the PDF has zero or only invalid xref's. This setting is ignored if --extract-toc or --extract-embedded is used.)r   r   z
--passwordz-Pr_   z,The password to use for decrypting PDF file.)r   r   r   OutputzUsed during output generation.z	--outfilez-o-zJPath to file where output is written. Or "-" (default) to write to stdout.z--raw-streamz-rz%Write stream objects without encodingz--binary-streamz-bz)Write stream objects with binary encodingz--text-streamz-tz"Write stream objects as plain text)	r   __doc__add_argumentr*   pdfminer__version__add_mutually_exclusive_groupadd_argument_groupr   )r   procedure_parserparse_paramsoutput_paramscodec_parsers        r%   create_parserr   #  s1   $???F
.     7!577	     '     ::<<!!+ "    !!%	 "    ,,- -  L ?     	     @	     B     7     ;     --4 .  M      !==??L4     8     1     Mr'   argvc                 l   t                      }|                    |           }|j        r0t          j                                        t          j                   |j        dk    rt          j	        }nt          |j        d          }|j        r%d |j                            d          D             }ng }|j        rd |j        D             }n:|j        r%d |j                            d          D             }nt                      }|j        }|j        rd}n|j        rd	}n|j        rd
}nd }|j        D ]g}|j        rt-          ||||||j        |d            &|j        rt3          |||j                   Et5          ||||||j        |d |j        	  	         h|                                 d S )N)argsr   wc                 ,    g | ]}t          |          S rj   r   rJ   xs     r%   
<listcomp>zmain.<locals>.<listcomp>  s    :::Q#a&&:::r'   ,c                     h | ]}|d z
  S ro   rj   r   s     r%   	<setcomp>zmain.<locals>.<setcomp>  s    444Q1q5444r'   c                 2    h | ]}t          |          d z
  S r   r   r   s     r%   r   zmain.<locals>.<setcomp>  s"    ???!3q66A:???r'   r4   r5   r6   )rd   re   r1   rf   )rd   rf   )rd   re   r1   rf   rF   )r   
parse_argsdebuglogging	getLoggersetLevelDEBUGoutfilesysstdoutrw   objectssplitpage_numbersrc   rW   rd   
raw_streambinary_streamtext_streamr   extract_tocr   rO   extract_embeddedr   r   rF   r~   )	r   r   r   r`   rb   rc   rd   r1   ra   s	            r%   mainr     s
   __F$''Dz 4$$W]333|s
T\3''| ::$,"4"4S"9"9::: 44$"3444	 ??t|'9'9#'>'>???%%}H $		 		    	!	 	 	 	 	 " 	EHAVWWWWW!#'#:
 
 
 
 
 
KKMMMMMr'   __main__rI   )F)NF)r_   FNN)r_   FNNF)Ar   r   os.pathr   rer   argparser   typingr   r   r   r   r   r	   r
   r   r   r   pdfminer.pdfdocumentr   r   r   pdfminer.pdfexceptionsr   r   r   r   pdfminer.pdfpager   pdfminer.pdfparserr   pdfminer.pdftypesr   r   r   r   pdfminer.psparserr   r   r   pdfminer.utilsr   basicConfigr   __name__rP   compiler+   r*   r)   r.   rz   r;   boolrT   r^   r   r   r   r   r   r   r   r   rj   r'   r%   <module>r      s   ) )   				 



 # # # # # # V V V V V V V V V V V V V V V V V V V V V V  L L L L L L L L L L            % $ $ $ $ $ ( ( ( ( ( ( J J J J J J J J J J J J 7 7 7 7 7 7 7 7 7 7 # # # # # #     		8	$	$
"*>
?
?@eCJ @C @ @ @ @> > >f >Xc] >d > > > >H  % 		  
	   .  $	 		 C= 	
 
   >  $9 999 9 s^	9
 9 9 C=9 9 
9 9 9 9x 3z?? s>** &)3 &)# &)3 &)4 &) &) &) &)\  $$! !!! SM! s^	!
 ! ! C=! ! ! 
! ! ! !Hx~ x x x xv> >xS	" >d > > > >B zDFFFFF r'   