
    .it                         d dl Z d dlZd dlmZmZmZmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZmZmZ d dlmZmZ d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZm Z   ej!        e"          Z# ed
          Z$ ed          Z% G d d          Z&dS )    N)	AnyBinaryIO	ContainerDictIteratorListOptionalSetTuple)settings)PDFDocumentPDFNoPageLabelsPDFTextExtractionNotAllowed)PDFObjectNotFoundPDFValueError)	PDFParser)
dict_value	int_value
list_valueresolve1)LIT)Rect
parse_rectPagePagesc                      e Zd ZdZdedededee         ddf
dZdefd	Z	h d
Z
ededed          fd            Ze	 	 	 	 	 ddedeee                  dededededed          fd            ZdedefdZdededefdZdedee         fdZdS )PDFPageaz  An object that holds the information about a page.

    A PDFPage object is merely a convenience class that has a set
    of keys and values, which describe the properties of a page
    and point to its contents.

    Attributes
    ----------
      doc: a PDFDocument object.
      pageid: any Python object that can uniquely identify the page.
      attrs: a dictionary of page attributes.
      contents: a list of PDFStream objects that represents the page content.
      lastmod: the last modified time of the page.
      resources: a dictionary of resources used by the page.
      mediabox: the physical size of the page.
      cropbox: the crop rectangle of the page.
      rotate: the page rotation (in degree).
      annots: the page annotations.
      beads: a chain that represents natural reading order.
      label: the page's label (typically, the logical page number).

    docpageidattrslabelreturnNc                 <   || _         || _        t          |          | _        || _        t          | j                            d                    | _        t          | j                            dt                                          | _	        | 
                    | j                            d                    | _        |                     | j                            d          | j                  | _        |                     | j                            d                    | _        t!          | j                            dd                    dz   dz  | _        | j                            d	          | _        | j                            d
          | _        dS )zInitialize a page object.

        doc: a PDFDocument object.
        pageid: any Python object that can uniquely identify the page.
        attrs: a dictionary of page attributes.
        label: page label string.
        LastModified	ResourcesMediaBoxCropBoxContentsRotater   ih  AnnotsBN)r   r   r   r    r!   r   getlastmoddict	resources_parse_mediaboxmediabox_parse_cropboxcropbox_parse_contentscontentsr   rotateannotsbeads)selfr   r   r    r!   s        L/var/www/html/analyses/venv/lib/python3.11/site-packages/pdfminer/pdfpage.py__init__zPDFPage.__init__0   s3    &&


~ > >??/7JNN;//0
 0
 ,,TZ^^J-G-GHH**4:>>)+D+DdmTT,,TZ^^J-G-GHH !!<!<==CsJjnnX..Z^^C((


    c                 (    d| j         d| j        dS )Nz<PDFPage: Resources=z, MediaBox=>)r/   r1   )r9   s    r:   __repr__zPDFPage.__repr__O   s    UdnUU4=UUUUr<   >   r)   r'   r&   r%   documentc              #   p   	K   	 d
dt           dt          t          t           f         dt          t          t                             dt
          t          t          t          t           t          t           t           f         f         f                  f 	fd		                                 }n$# t          $ r t          j        d           }Y nw xY wd}dj        v rC 	j        d         j                  }|D ]$\  }}  ||t          |                    V  d}%|sj        D ]}|                                D ]v}	                     |          }t#          |t$                    r9|                    d	          t(          u r  ||t          |                    V  g# t*          $ r Y sw xY wd S d S )Nobjparentvisitedr"   c              3      K   t          | t                    r7| }t                              |                                                    }n(| j        }t          |                                           }|t                      }||v rd S |                    |           |                                D ]\  }}|	j	        v r	||vr|||<   |
                    d          }|!t          j        s|
                    d          }|t          u rRd|v rNt                              d|d                    t!          |d                   D ]} 
|||          E d {V  d S |t"          u r#t                              d|           ||fV  d S d S )NTypetypeKidszPages: Kids=%rzPage: %r)
isinstanceintr   getobjcopyobjidsetadditemsINHERITABLE_ATTRSr,   r   STRICTLITERAL_PAGESlogdebugr   LITERAL_PAGE)rB   rC   rD   	object_idobject_propertieskvobject_typechildclsdepth_first_searchr@   s            r:   r^   z0PDFPage.create_pages.<locals>.depth_first_searchV   s     
 #s## ;	$.xy/I/I$J$J$O$O$Q$Q!!  I	$.sOO$8$8$:$:! %%G##KK	""" - -1---!;L2L2L+,%a(+//77K"8?"/33F;;m++:K0K0K		*,=f,EFFF'(9&(ABB U UE11%9JGTTTTTTTTTTU U ,,		*&7888 "3444444 -,r<   Fr   TrF   N)r   r   strr	   r
   r   r   rJ   get_page_labelsr   	itertoolsrepeatcatalognextxrefs
get_objidsrK   rI   r.   r,   rV   r   )
r]   r@   page_labelspagesobjectsrM   treexrefrB   r^   s
   ``       @r:   create_pageszPDFPage.create_pagesT   s     
 +/$	5 $	5$	5cN$	5 c#h'$	5 eCc4S>&9!::;<	$	5 $	5 $	5 $	5 $	5 $	5 $	5 $	5L	13;3K3K3M3MKK 	1 	1 	1#*400KKK	1 h&&&(()9')BHDTUUG&  tc(E4k1B1BCCCCC 		   !__..  E&ooe44%c400 OSWWV__5T5T"%#hsD<M<M"N"NNNN,   		 		 s%   B+ +CC?A#F##
F0/F0r    TFfppagenosmaxpagespasswordcachingcheck_extractablec              #   F  K   t          |          }t          |||          }|j        s5|rd|z  }	t          |	          d|z  }
t                              |
           t          |                     |                    D ]\  }}|r||vr|V  |r||dz   k    r d S d S )N)rr   rs   z"Text extraction is not allowed: %rzThe PDF %r contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case   )r   r   is_extractabler   rT   warning	enumeraterm   )r]   ro   rp   rq   rr   rs   rt   parserr   	error_msgwarning_msgpagenopages                r:   	get_pageszPDFPage.get_pages   s       2&8WEEE ! 	)  )@2E	1)<<<A DF	F  K(((%c&6&6s&;&;<< 	 	LFD F'11JJJ H
22	 	r<   valuec                     d}|t                               d           |S 	 t          d t          |          D                       S # t          $ r t                               d           |cY S w xY w)N)        r   g      @g     @zHMediaBox missing from /Page (and not inherited), defaulting to US Letterc              3   4   K   | ]}t          |          V  d S r_   r   .0vals     r:   	<genexpr>z*PDFPage._parse_mediabox.<locals>.<genexpr>   (      GGhsmmGGGGGGr<   z2Invalid MediaBox in /Page, defaulting to US Letter)rT   rx   r   r   r   )r9   r   	us_letters      r:   r0   zPDFPage._parse_mediabox   s    ,	=KK*   	GGxGGGGGG 	 	 	KKLMMM	s   %A &A10A1r1   c                     ||S 	 t          d t          |          D                       S # t          $ r t                              d           |cY S w xY w)Nc              3   4   K   | ]}t          |          V  d S r_   r   r   s     r:   r   z)PDFPage._parse_cropbox.<locals>.<genexpr>   r   r<   z0Invalid CropBox in /Page, defaulting to MediaBox)r   r   r   rT   rx   )r9   r   r1   s      r:   r2   zPDFPage._parse_cropbox   sk    =O	GGxGGGGGG 	 	 	KKJKKKOOO	s   %, &AAc                 \    g }|'t          |          }t          |t                    s|g}|S r_   )r   rI   list)r9   r   r5   s      r:   r4   zPDFPage._parse_contents   s6     Hh-- &$:r<   )Nr   rn   TF)__name__
__module____qualname____doc__r   objectr	   r`   r;   r?   rQ   classmethodr   rm   r   r   rJ   boolr   r   r   r0   r2   r   r4    r<   r:   r   r      s        .)) ) 	)
 }) 
) ) ) )>V# V V V V GFF;K ;HY4G ; ; ; [;z  -1"'" "" )C.)" 	"
 " "  " 
)	" " " ["HS T    "
C 
4 
D 
 
 
 
S T#Y      r<   r   )'rb   loggingtypingr   r   r   r   r   r   r	   r
   r   pdfminerr   pdfminer.pdfdocumentr   r   r   pdfminer.pdfexceptionsr   r   pdfminer.pdfparserr   pdfminer.pdftypesr   r   r   r   pdfminer.psparserr   pdfminer.utilsr   r   	getLoggerr   rT   rV   rS   r   r   r<   r:   <module>r      s        W W W W W W W W W W W W W W W W W W W W W W               
 D C C C C C C C ( ( ( ( ( ( I I I I I I I I I I I I ! ! ! ! ! ! + + + + + + + +g!! s6{{GB B B B B B B B B Br<   