
    .ig              #          d Z ddlZddlZddlmZ ddlmZmZmZm	Z	m
Z
mZ ddlmZmZmZmZmZ ddlmZ ddlmZmZ ddlmZmZ dd	lmZ dd
lmZmZ ddlm Z  ddl!m"Z"m#Z#m$Z$ 	 	 	 	 	 	 	 	 	 	 	 	 	 d*dede"de%de%de
e         de&de
ee&                  de%de'de&de%de
e%         de(d e(d!e(d"ed#df"d$Z)	 	 	 	 	 	 d+d&e#de%de
ee&                  de&d'e(de%de
e         d#e%fd(Z*	 	 	 	 	 d,d&e#de%de
ee&                  de&d'e(de
e         d#e	e         fd)Z+dS )-zIFunctions that can be used for the most common use-cases for pdfminer.six    N)StringIO)AnyBinaryIO	ContainerIteratorOptionalcast)HOCRConverterHTMLConverterPDFPageAggregatorTextConverterXMLConverter)ImageWriter)LAParamsLTPage)	PDFDeviceTagExtractor)PDFValueError)PDFPageInterpreterPDFResourceManager)PDFPage)AnyIO
FileOrNameopen_filenametextutf-8       ?normalFinfoutfpoutput_typecodeclaparamsmaxpagespage_numberspasswordscalerotation
layoutmode
output_dirstrip_controldebugdisable_cachingkwargsreturnc           	      :   |r0t          j                                        t           j                   d}|rt	          |          }t          |           }d}|dk    r!|t          j        k    rt          j        j        }|dk    rt          |||||          }n|dk    rt          ||||||          }nx|dk    rt          |||||
||          }n[|d	k    rt          |||||
          }n@|dk    r&t          |t          t          |          |          }nd| }t!          |          |J t#          ||          }t%          j        | ||||           D ])}|j        |	z   dz  |_        |                    |           *|                                 dS )ak  Parses text from inf-file and writes to outfp file-like object.

    Takes loads of optional arguments but the defaults are somewhat sane.
    Beware laparams: Including an empty LAParams is not the same as passing
    None!

    :param inf: a file-like object to read PDF structure from, such as a
        file handler (using the builtin `open()` function) or a `BytesIO`.
    :param outfp: a file-like object to write the text to.
    :param output_type: May be 'text', 'xml', 'html', 'hocr', 'tag'.
        Only 'text' works properly.
    :param codec: Text decoding codec
    :param laparams: An LAParams object from pdfminer.layout. Default is None
        but may not layout correctly.
    :param maxpages: How many pages to stop parsing after
    :param page_numbers: zero-indexed page numbers to operate on.
    :param password: For encrypted PDFs, the password to decrypt.
    :param scale: Scale factor
    :param rotation: Rotation factor
    :param layoutmode: Default is 'normal', see
        pdfminer.converter.HTMLConverter
    :param output_dir: If given, creates an ImageWriter for extracted images.
    :param strip_control: Does what it says on the tin
    :param debug: Output more logging data
    :param disable_caching: Does what it says on the tin
    :param other:
    :return: nothing, acting as it does on two streams. Use StringIO to get
        strings.
    Ncachingr   )r#   r$   imagewriterxml)r#   r$   r4   stripcontrolhtml)r#   r(   r*   r$   r4   hocr)r#   r$   r6   tag)r#   z1Output type can be text, html, xml or tag but is r%   r'   r3   ih  )logging	getLoggersetLevelDEBUGr   r   sysstdoutbufferr   r   r   r
   r   r	   r   r   r   r   	get_pagesrotateprocess_pageclose)r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r4   rsrcmgrdevicemsginterpreterpages                         O/var/www/html/analyses/venv/lib/python3.11/site-packages/pdfminer/high_level.pyextract_text_to_fprL      s$   ^  4$$W]333K .!*-- _)<===G"&Ff#*!4!4
!f#
 
 
 
		#&
 
 
 
		!#
 
 
 
		&
 
 
 
		gtHe'<'<EJJJ P+OOC   $Wf55K!##   ' ' {X-4  &&&&
LLNNNNN    Tpdf_filer3   c           	         |t                      }t          | d          5 }t                      5 }t          t          |          }t          |          }	t          |	|||          }
t          |	|
          }t          j	        |||||          D ]}|
                    |           |                                cddd           cddd           S # 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )aw  Parse and return the text contained in a PDF file.

    :param pdf_file: Either a file path or a file-like object for the PDF file
        to be worked on.
    :param password: For encrypted PDFs, the password to decrypt.
    :param page_numbers: List of zero-indexed page numbers to extract.
    :param maxpages: The maximum number of pages to parse
    :param caching: If resources should be cached
    :param codec: Text decoding codec
    :param laparams: An LAParams object from pdfminer.layout. If None, uses
        some default settings that often work well.
    :return: a string containing all of the text extracted.
    Nrbr2   )r#   r$   r:   )r   r   r   r	   r   r   r   r   r   rB   rD   getvalue)rN   r'   r&   r%   r3   r#   r$   fpoutput_stringrF   rG   rI   rJ   s                rK   extract_textrT      s   , ::	x	&	& ("hjj (M(B$W555wUXVVV(&99%
 
 
 	+ 	+D $$T****%%''( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( (s5   C.BC=C.C	C.C	C..C25C2c              #     K   |t                      }t          | d          5 }t          t          |          }t	          |          }t          ||          }t          ||          }	t          j        |||||          D ]/}
|		                    |
           |
                                }|V  0	 ddd           dS # 1 swxY w Y   dS )a  Extract and yield LTPage objects

    :param pdf_file: Either a file path or a file-like object for the PDF file
        to be worked on.
    :param password: For encrypted PDFs, the password to decrypt.
    :param page_numbers: List of zero-indexed page numbers to extract.
    :param maxpages: The maximum number of pages to parse
    :param caching: If resources should be cached
    :param laparams: An LAParams object from pdfminer.layout. If None, uses
        some default settings that often work well.
    :return: LTPage objects
    NrP   r2   )r$   r:   )r   r   r	   r   r   r   r   r   rB   rD   
get_result)rN   r'   r&   r%   r3   r$   rR   resource_managerrG   rI   rJ   layouts               rK   extract_pagesrY      s2     ( ::	x	&	& "(B-g>>>"#3hGGG()96BB%
 
 
 		 		D $$T***&&((FLLLL		                 s   BCCC)r   r   Nr   Nr   r   r   r   NFFF)r   Nr   Tr   N)r   Nr   TN),__doc__r;   r?   ior   typingr   r   r   r   r   r	   pdfminer.converterr
   r   r   r   r   pdfminer.imager   pdfminer.layoutr   r   pdfminer.pdfdevicer   r   pdfminer.pdfexceptionsr   pdfminer.pdfinterpr   r   pdfminer.pdfpager   pdfminer.utilsr   r   r   strintfloatboolrL   rT   rY    rM   rK   <module>rj      se   O O  



       E E E E E E E E E E E E E E E E              ' & & & & & , , , , , , , , 6 6 6 6 6 6 6 6 0 0 0 0 0 0 E E E E E E E E $ $ $ $ $ $ ; ; ; ; ; ; ; ; ; ; #'-1 $!w w	ww w 	w
 x w w 9S>*w w w w w w w w w  !w" 
#w w w wx -1#'(( (((((( 9S>*(( 	((
 (( (( x (( 	(( (( (( ((Z -1#'% %%% 9S>*% 	%
 % x % f% % % % % %rM   