ó
^›&Zc           @   sÈ   d  d l  Z  d  d l Z d  d l Z d d d „  ƒ  YZ d d d „  ƒ  YZ d d d „  ƒ  YZ d d d	 „  ƒ  YZ e d
 k rÄ e e  j d ƒ Z	 e	 j
 d e e  j ƒ d k rº e  j d n d ƒ n  d S(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  „  Z e e d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c         C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c         C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt   idt   lent   idst	   write_seqt   seq(   R   t   entryt   splitt   store_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   store   s    *c         C   s   |  j  j d | ƒ d  S(   Ns   >%s
(   R   t   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c         C   s0   | r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s
(   R   R   R   (   R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s    iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   
i   (   t   rangeR
   t   join(   R   t   sequencet   piece_lengtht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name__t
   __module__R   t   TrueR   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR       s   		t	   ReadFastac           B   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j
 |  j j ƒ |  j j
 |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib] Reading FASTA into memory: %ss   
(   R   t	   sequencest   SequenceSourcet   fastat   nextt   post   syst   stderrR   t   flusht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		(c         C   s   |  j  j ƒ  d  S(   N(   R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s   	R   c           B   sn   e  Z e e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d „  Z d	 d	 d	 d	 d „ Z RS(
   c         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _
 g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   unique_hash_listt   unique_next_hashR   t   file_pointert   seekt	   total_seqR
   t	   readlinest
   startswitht   resett   init_unique_hash(   R   R)   R,   R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   C   s(    													:
	c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j |  j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t
 ƒD] } | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t   reverse(   t   next_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t   sortedR   R/   R
   t   total_uniqueR6   (   R   t   hasht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s    
	Jc         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	
c         C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ | d |  _	 t
 St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t
   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j
 d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R*   R   R1   t   readlinet   stripR	   R
   R"   R   RE   R5   R2   t   tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		#c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s
    
c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s
    				c         C   sX  d d  l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j j
 ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } n  d g | d }	 x | D] }
 |	 |
 c d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   
g      Y@i
   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R
   R   t   maxt   intt   figuret   rcParamst   updatet   rct   GridSpect   subplotRO   R   t   subplots_adjustt   plott   fill_betweenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt   numpyt   meant   stdt   minR)   t   savefigt   show(   R   t   titlet   destt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_lengthst   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_length_distribution³   sx    
"
)"/)/*)		N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   								t
   QualSourcec           B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _
 |  j
 j d ƒ |  j r d  |  _ nD t g  |  j
 j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR
   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    									:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j
 d 7_
 t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j
 d 7_
 t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R
   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t
   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		+#+c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					(   R   R   R   R   R!   R   R6   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s   		t   __main__i   R‚   i   i   R*   (    (    (    (    (   R#   R|   R<   R    R   R   R   R   t   argvR    RŽ   R
   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   <module>   s   Å>