o
    jEiG                     @   s  d dl mZmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZmZmZ d dlmZ d dlZd dlmZmZmZ d	d
lmZ d	dlmZmZmZ d	dlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ eG dd dZ,eG dd dZ-eG dd dZ.G dd de/eZ0G dd de/eZ1dede/defddZ2G dd dZ3G dd dZ4G d d! d!Z5G d"d# d#Z6dS )$    )	dataclassasdict)Enum)chain)unescape)ListDictIteratorIterablePatternOptional)ElementTreeN)	HTTPErrorSessionResponse   )ProxyConfig)	WATCH_URLINNERTUBE_CONTEXTINNERTUBE_API_URL)VideoUnavailableYouTubeRequestFailedNoTranscriptFoundTranscriptsDisabledNotTranslatableTranslationLanguageNotAvailableFailedToCreateConsentCookieInvalidVideoId	IpBlockedRequestBlockedAgeRestrictedVideoUnplayableYouTubeDataUnparsablePoTokenRequiredc                   @   s(   e Zd ZU eed< eed< 	 eed< dS )FetchedTranscriptSnippettextstartdurationN)__name__
__module____qualname__str__annotations__float r.   r.   `/var/www/agentarbitrage/venv/lib/python3.10/site-packages/youtube_transcript_api/_transcripts.pyr$   "   s   
 r$   c                   @   s~   e Zd ZU dZee ed< eed< eed< eed< eed< de	e fdd	Z
defd
dZdefddZdee fddZdS )FetchedTranscriptz
    Represents a fetched transcript. This object is iterable, which allows you to
    iterate over the transcript snippets.
    snippetsvideo_idlanguagelanguage_codeis_generatedreturnc                 C   
   t | jS N)iterr1   selfr.   r.   r/   __iter__>      
zFetchedTranscript.__iter__c                 C   s
   | j | S r8   )r1   )r;   indexr.   r.   r/   __getitem__A   r=   zFetchedTranscript.__getitem__c                 C   r7   r8   )lenr1   r:   r.   r.   r/   __len__D   r=   zFetchedTranscript.__len__c                 C   s   dd | D S )Nc                 S   s   g | ]}t |qS r.   )r   ).0snippetr.   r.   r/   
<listcomp>H   s    z1FetchedTranscript.to_raw_data.<locals>.<listcomp>r.   r:   r.   r.   r/   to_raw_dataG   s   zFetchedTranscript.to_raw_dataN)r(   r)   r*   __doc__r   r$   r,   r+   boolr	   r<   r?   intrA   r   rE   r.   r.   r.   r/   r0   1   s   
 r0   c                   @   s   e Zd ZU eed< eed< dS )_TranslationLanguager3   r4   N)r(   r)   r*   r+   r,   r.   r.   r.   r/   rI   K   s   
 rI   c                   @      e Zd ZdZdZdZdS )_PlayabilityStatusOKERRORLOGIN_REQUIREDN)r(   r)   r*   rL   rM   rN   r.   r.   r.   r/   rK   Q       rK   c                   @   rJ   )_PlayabilityFailedReasonu%   Sign in to confirm you’re not a botz/This video may be inappropriate for some users.zThis video is unavailableN)r(   r)   r*   BOT_DETECTEDAGE_RESTRICTEDVIDEO_UNAVAILABLEr.   r.   r.   r/   rP   W   rO   rP   responser2   r6   c              
   C   sD   z| j dkr
t||   | W S  ty! } zt||d }~ww )Ni  )status_coder   raise_for_statusr   r   )rT   r2   errorr.   r.   r/   _raise_http_errors]   s   

rX   c                   @   s|   e Zd Zdededededededee fdd	Zddede	fddZ
defddZedefddZdedd fddZdS )
Transcripthttp_clientr2   urlr3   r4   r5   translation_languagesc                 C   s>   || _ || _|| _|| _|| _|| _|| _dd |D | _dS )z
        You probably don't want to initialize this directly. Usually you'll access Transcript objects using a
        TranscriptList.
        c                 S   s   i | ]}|j |jqS r.   )r4   r3   rB   translation_languager.   r.   r/   
<dictcomp>}   s    z'Transcript.__init__.<locals>.<dictcomp>N)_http_clientr2   _urlr3   r4   r5   r\   _translation_languages_dict)r;   rZ   r2   r[   r3   r4   r5   r\   r.   r.   r/   __init__h   s   zTranscript.__init__Fpreserve_formattingr6   c                 C   sV   d| j v r
t| j| j| j }t|dt|| jj}t	|| j| j
| j| jdS )z
        Loads the actual transcript data.
        :param preserve_formatting: whether to keep select HTML text formatting
        z&exp=xpe)rd   )r1   r2   r3   r4   r5   )ra   r#   r2   r`   get_TranscriptParserparserX   r%   r0   r3   r4   r5   )r;   rd   rT   r1   r.   r.   r/   fetch   s   


zTranscript.fetchc                 C   s"   dj | j| j| jrddS ddS )Nz7{language_code} ("{language}"){translation_description}z[TRANSLATABLE] )r3   r4   translation_description)formatr3   r4   is_translatabler:   r.   r.   r/   __str__   s   zTranscript.__str__c                 C   s   t | jdkS )Nr   )r@   r\   r:   r.   r.   r/   rl      s   zTranscript.is_translatablec                 C   sN   | j st| j|| jvrt| jt| j| jdj| j|d| j| |dg S )Nz{url}&tlang={language_code})r[   r4   T)	rl   r   r2   rb   r   rY   r`   rk   ra   )r;   r4   r.   r.   r/   	translate   s   


zTranscript.translateNF)r(   r)   r*   r   r+   rG   r   rI   rc   r0   rh   rm   propertyrl   rn   r.   r.   r.   r/   rY   g   s*    
rY   c                	   @   s   e Zd ZdZdedeeef deeef dee fddZ	e
deded	ed
d fddZd
ee fddZdee d
efddZdee d
efddZdee d
efddZdee deeeef  d
efddZd
efddZdee d
efddZdS )TranscriptListz
    This object represents a list of transcripts. It can be iterated over to list all transcripts which are available
    for a given YouTube video. Also, it provides functionality to search for a transcript in a given language.
    r2   manually_created_transcriptsgenerated_transcriptsr\   c                 C   s   || _ || _|| _|| _dS )a  
        The constructor is only for internal use. Use the static build method instead.

        :param video_id: the id of the video this TranscriptList is for
        :param manually_created_transcripts: dict mapping language codes to the manually created transcripts
        :param generated_transcripts: dict mapping language codes to the generated transcripts
        :param translation_languages: list of languages which can be used for translatable languages
        N)r2   _manually_created_transcripts_generated_transcripts_translation_languages)r;   r2   rr   rs   r\   r.   r.   r/   rc      s   
zTranscriptList.__init__rZ   captions_jsonr6   c                 C   s   dd | dg D }i }i }|d D ];}| dddkr |}n|}t| ||d d	d|d
 d d d |d | dddk| ddrG|ng ||d < qt||||S )a]  
        Factory method for TranscriptList.

        :param http_client: http client which is used to make the transcript retrieving http calls
        :param video_id: the id of the video this TranscriptList is for
        :param captions_json: the JSON parsed from the YouTube pages static HTML
        :return: the created TranscriptList
        c                 S   s,   g | ]}t |d  d d d |d dqS )languageNamerunsr   r%   languageCoder3   r4   )rI   r]   r.   r.   r/   rD      s    z(TranscriptList.build.<locals>.<listcomp>translationLanguagescaptionTrackskindri   asrbaseUrlz	&fmt=srv3namery   r   r%   rz   isTranslatableF)re   rY   replacerq   )rZ   r2   rw   r\   rr   rs   captiontranscript_dictr.   r.   r/   build   s0   

zTranscriptList.buildc                 C   s   t | j | j S r8   )r   rt   valuesru   r:   r.   r.   r/   r<      s   zTranscriptList.__iter__language_codesc                 C   s   |  || j| jgS )a>  
        Finds a transcript for a given language code. Manually created transcripts are returned first and only if none
        are found, generated transcripts are used. If you only want generated transcripts use
        `find_manually_created_transcript` instead.

        :param language_codes: A list of language codes in a descending priority. For example, if this is set to
        ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
        it fails to do so.
        :return: the found Transcript
        )_find_transcriptrt   ru   r;   r   r.   r.   r/   find_transcript  s   
zTranscriptList.find_transcriptc                 C      |  || jgS )a  
        Finds an automatically generated transcript for a given language code.

        :param language_codes: A list of language codes in a descending priority. For example, if this is set to
        ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
        it fails to do so.
        :return: the found Transcript
        )r   ru   r   r.   r.   r/   find_generated_transcript  s   	z(TranscriptList.find_generated_transcriptc                 C   r   )a|  
        Finds a manually created transcript for a given language code.

        :param language_codes: A list of language codes in a descending priority. For example, if this is set to
        ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
        it fails to do so.
        :return: the found Transcript
        )r   rt   r   r.   r.   r/    find_manually_created_transcript  s   z/TranscriptList.find_manually_created_transcripttranscript_dictsc                 C   s:   |D ]}|D ]}||v r||     S qqt | j|| r8   )r   r2   )r;   r   r   r4   r   r.   r.   r/   r   ,  s   zTranscriptList._find_transcriptc                 C   sR   dj | j| dd | j D | dd | j D | dd | jD dS )Na  For this video ({video_id}) transcripts are available in the following languages:

(MANUALLY CREATED)
{available_manually_created_transcript_languages}

(GENERATED)
{available_generated_transcripts}

(TRANSLATION LANGUAGES)
{available_translation_languages}c                 s       | ]}t |V  qd S r8   r+   rB   
transcriptr.   r.   r/   	<genexpr>C  s
    
z)TranscriptList.__str__.<locals>.<genexpr>c                 s   r   r8   r   r   r.   r.   r/   r   G  s    
c                 s   s"    | ]}d j |j|jdV  qdS )z{language_code} ("{language}")r{   N)rk   r3   r4   r]   r.   r.   r/   r   J  s    
)r2   /available_manually_created_transcript_languagesavailable_generated_transcriptsavailable_translation_languages)rk   r2   _get_language_descriptionrt   r   ru   rv   r:   r.   r.   r/   rm   8  s   


zTranscriptList.__str__transcript_stringsc                 C   s    d dd |D }|r|S dS )N
c                 s   s    | ]	}d j |dV  qdS )z - {transcript})r   N)rk   r   r.   r.   r/   r   T  s
    

z;TranscriptList._get_language_description.<locals>.<genexpr>None)join)r;   r   descriptionr.   r.   r/   r   S  s   
z(TranscriptList._get_language_descriptionN)r(   r)   r*   rF   r+   r   rY   r   rI   rc   staticmethodr   r   r	   r<   r
   r   r   r   r   rm   r   r.   r.   r.   r/   rq      sL    


-

rq   c                   @   s   e Zd Zdedee fddZdedefddZ	d ded
e
defddZdededefddZdededefddZdededdfddZdededdfddZdedefddZdedefddZdededefddZdS )!TranscriptListFetcherrZ   proxy_configc                 C   s   || _ || _d S r8   )r`   _proxy_config)r;   rZ   r   r.   r.   r/   rc   \  s   
zTranscriptListFetcher.__init__r2   r6   c                 C   s   t | j|| |S r8   )rq   r   r`   _fetch_captions_json)r;   r2   r.   r.   r/   rh   `  s
   zTranscriptListFetcher.fetchr   
try_numberc              
   C   s   z|  |}| ||}| ||}| ||W S  tyJ } z&| jd u r'dn| jj}|d |k r@| j||d dW  Y d }~S || jd }~ww )Nr   r   )r   )	_fetch_video_html_extract_innertube_api_key_fetch_innertube_data_extract_captions_jsonr   r   retries_when_blockedr   with_proxy_config)r;   r2   r   htmlapi_keyinnertube_data	exceptionretriesr.   r.   r/   r   g  s   

z*TranscriptListFetcher._fetch_captions_jsonr   c                 C   sF   d}t ||}|rt| dkr|dS d|v rt|t|)Nz)"INNERTUBE_API_KEY":\s*"([a-zA-Z0-9_-]+)"r   zclass="g-recaptcha")researchr@   groupsgroupr   r"   )r;   r   r2   patternmatchr.   r.   r/   r   w  s   
z0TranscriptListFetcher._extract_innertube_api_keyr   c                 C   s@   |  |d| |di d}|d u sd|vrt||S )NplayabilityStatuscaptionsplayerCaptionsTracklistRendererr}   )_assert_playabilityre   r   )r;   r   r2   rw   r.   r.   r/   r     s   z,TranscriptListFetcher._extract_captions_jsonplayability_status_dataNc                 C   s   | d}|tjjkri|d urk| d}|tjjkr.|tjjkr$t||tjjkr.t	||tj
jkrL|tjjkrL|dsD|drHt|t|| di  di  di  dg }t||d	d
 |D d S d S )Nstatusreasonzhttp://zhttps://errorScreenplayerErrorMessageRenderer	subreasonry   c                 S   s   g | ]}| d dqS )r%   ri   )re   )rB   runr.   r.   r/   rD     s    z=TranscriptListFetcher._assert_playability.<locals>.<listcomp>)re   rK   rL   valuerN   rP   rQ   r   rR   r    rM   rS   
startswithr   r   r!   )r;   r   r2   playability_statusr   
subreasonsr.   r.   r/   r     s0   


z)TranscriptListFetcher._assert_playabilityc                 C   s>   t d|}|d u rt|| jjjdd|d dd d S )Nzname="v" value="(.*?)"CONSENTzYES+r   z.youtube.com)domain)r   r   r   r`   cookiessetr   )r;   r   r2   r   r.   r.   r/   _create_consent_cookie  s   
z,TranscriptListFetcher._create_consent_cookiec                 C   s<   |  |}d|v r| || |  |}d|v rt||S )Nz&action="https://consent.youtube.com/s")_fetch_htmlr   r   )r;   r2   r   r.   r.   r/   r     s   

z'TranscriptListFetcher._fetch_video_htmlc                 C   s$   | j tj|d}tt||jS )N)r2   )r`   re   r   rk   r   rX   r%   )r;   r2   rT   r.   r.   r/   r     s   z!TranscriptListFetcher._fetch_htmlr   c                 C   s0   | j jtj|dt|dd}t|| }|S )N)r   )contextvideoId)json)r`   postr   rk   r   rX   r   )r;   r2   r   rT   datar.   r.   r/   r     s   
z+TranscriptListFetcher._fetch_innertube_data)r   )r(   r)   r*   r   r   r   rc   r+   rq   rh   rH   r   r   r   r   r   r   r   r   r   r.   r.   r.   r/   r   [  s    		r   c                   @   sP   e Zd Zg dZddefddZdedee fddZd	ede	e
 fd
dZdS )rf   )
strongembimarksmalldelinssubsupFrd   c                 C   s   |  || _d S r8   )_get_html_regex_html_regex)r;   rd   r.   r.   r/   rc     s   z_TranscriptParser.__init__r6   c                 C   s@   |rd | j}d| d }t|tj}|S tdtj}|S )N|z<\/?(?!\/?(z
)\b).*?\b>z<[^>]*>)r   _FORMATTING_TAGSr   compile
IGNORECASE)r;   rd   formats_regex
html_regexr.   r.   r/   r     s   z!_TranscriptParser._get_html_regexraw_datac                    s    fddt |D S )Nc              
      sN   g | ]#}|j d urtt jdt|j t|jd t|jdddqS )Nri   r&   durz0.0)r%   r&   r'   )	r%   r$   r   r   r   r   r-   attribre   )rB   xml_elementr:   r.   r/   rD     s    
z+_TranscriptParser.parse.<locals>.<listcomp>)r   
fromstring)r;   r   r.   r:   r/   rg     s   
z_TranscriptParser.parseNro   )r(   r)   r*   r   rG   rc   r   r+   r   r   r$   rg   r.   r.   r.   r/   rf     s
    	rf   )7dataclassesr   r   enumr   	itertoolsr   r   r   typingr   r   r	   r
   r   r   
defusedxmlr   r   requestsr   r   r   proxiesr   	_settingsr   r   r   _errorsr   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r0   rI   r+   rK   rP   rX   rY   rq   r   rf   r.   r.   r.   r/   <module>   s2     @
M (n