>temp_0|source:ó|start:3115|stop:3615
 e  Z e e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d
 „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ | |  
_ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  
_ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] 
} | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni 
   t   >(   t   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mix
ed_caseR"   R	   R 
>temp_1|source:ó|start:7562|stop:8062
n | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i  
 s   [fastalib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s
   axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	   linewidthg
š™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\
Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess  
 sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint   
xmaxt   ymint   ym
>temp_2|source:ó|start:9638|stop:10138
 ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t  
 qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   
R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fastalib.pyR     s    									:c      
   C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  
} | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |
  j d 7_ t St S
>temp_3|source:ó|start:9399|stop:9899
„  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ 
d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  
j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j
 ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   
qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R
   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingse
ts2/Vibrio/utils/fas
>temp_4|source:ó|start:6045|stop:6545
  s    c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   
R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/uti
ls/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g
  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (
   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyR6   ¬   s    				c         C   sX  d d  l  j } d
 d  l j } g  } |
>temp_5|source:ó|start:8782|stop:9282
ckstept   pltRh   t   sequence_lengthst   seq_len_distributionR8   t   figt
   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingse
ts2/Vibrio/utils/fastalib.pyt&   visualize_sequence_length_distribution³   sx   
 ")"/)/*)
		N(   R   R   R   RE   R   R7   R!   R
D   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads-for
-assembly/trainingse
>temp_6|source:ó|start:5799|stop:6299
        C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N
(   R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/azo
mer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_rea
d_id    s    c         C   s   |  j  j ƒ  d  S(   N(   R1   R  
 (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibr
io/utils/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |
  _ g  |  _ |  j 
>temp_7|source:ó|start:10598|stop:11098
   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    
c         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d 
ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (
    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib
.pyR6   <  s    					(   R   R   R   R   R!   R   R6   (    (    (
    sJ   /mnt/data/a
>temp_8|source:ó|start:67|stop:567
 d „  ƒ  YZ d d d „  ƒ  YZ d d d „  ƒ  YZ d d d	 „  ƒ  YZ e d k r
Ä e e  j d ƒ Z	 e	 j d e e  j ƒ d k rº e  j d n d ƒ n  d S( 
  iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  „  Z e e d „ Z d „
  Z e d „ Z d d „ Z d „  Z RS(   c         C   s   | |  _  t |
 d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent   output_fil
e_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembly/train
ingsets2/Vibrio/util
>temp_9|source:ó|start:6167|stop:6667
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c        
 C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    ( 
  R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s    				c
         C   sX  d d  l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  r
Ÿ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  | j 
t |  j ƒ ƒ q1 W
>temp_10|source:ó|start:5338|stop:5838
 } | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j 
|  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |
  j d 7_ t S(   Ni   t    R(   (   R*   R   R1   t   readlinet   str
ipR	   R   R"   R   RE   R5   R2   t   tellR-   R>   (   R   R   t   line(
    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fas
talib.pyR;   ˆ   s$    		#c         C   s7   
|  j  ƒ  x& |  j ƒ
>temp_11|source:ó|start:10022|stop:10522
 xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | 
ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t |
 ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ
 ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	
   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qual
scoresRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fasta
>temp_12|source:ó|start:1144|stop:1644
/trainingsets2/Vibrio/utils/fastalib.pyR      s    c         C   s0   | 
r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R   (
   R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingset
s2/Vibrio/utils/fastalib.pyR   !   s    iP   c         C   sk   t  d 
t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | |
 | d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join(   R   
t   sequencet   pi
>temp_13|source:ó|start:291|stop:791
e d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c         C   s
   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   ope
nt   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-fo
r-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c   
      C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j 
| j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniq
uet   write_idt   
>temp_14|source:ó|start:9653|stop:10153
Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   qual
s_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   
R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/uti
ls/fastalib.pyR     s    									:c         C   sF  | 
 j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | 
ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t 
St Sn  | j d ƒ 
>temp_15|source:ó|start:7072|stop:7572
 n  | d  k r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ
| j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) 
d* ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 
d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j 
t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ
 t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ |
 d> ƒ Wn | j
>temp_16|source:ó|start:6546|stop:7046
 j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ 
t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ
 t t | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<q
W| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j 
d d ƒ } | j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ 
| j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j d
# ƒ | j d$ ƒ |
>temp_17|source:ó|start:3208|stop:3708
d	 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ | |  _ | |  _ d
 |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ
 |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ
 r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(  
 t   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R
	   R   R   R   t   unique_hash_dictt   unique_hash_listt   unique_next_ha
shR   t   file_poi
>temp_18|source:ó|start:7294|stop:7794
d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i
 d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j'
 | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j*
 } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd
  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t   
figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   li
nestylet   -t	   li
>temp_19|source:ó|start:5231|stop:5731
   s    	!&c         C   sì   d  |  _ |  j j ƒ  d j ƒ 
 |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t St	 
Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j
 rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R*   R  
 R1   t   readlinet   stripR	   R   R"   R   RE   R5   R2   t   tellR-   R> 
  (   R   R   t   line(    (    sJ   /mnt/data/azomer/reads-for-assembly/tra
iningsets2/Vibrio/ut
>temp_20|source:ó|start:9334|stop:9834
					t   QualSourcec           B   s/   e  Z e d  „ Z d „  Z d „  Z 
d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  
_ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ 
nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   N
i    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals
_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R
8   (    (    sJ   /
>temp_21|source:ó|start:5477|stop:5977
 n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R*   R   R1   
t   readlinet   stripR	   R   R"   R   RE   R5   R2   t   tellR-   R>   (  
 R   R   t   line(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		#c
         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(  
 N(   R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/a
zomer/reads-for-asse
>temp_22|source:ó|start:7735|stop:8235
idtht   gridt   colors   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i   
t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   b
lackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   sequence len
gthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt   ymin
t   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   c
enteri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / 
min: %s / max: %st 
>temp_23|source:ó|start:7151|stop:7651
| j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd)
 d* ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0
 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j
 t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ
 t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ |
 d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'
  i    i   s   [f
>temp_24|source:ó|start:7035|stop:7535
j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  |
 j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t
 |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d.
 t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7
 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d
0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ
| d  k r|  j* 
>temp_25|source:ó|start:6481|stop:6981
		c         C   sX  d d  l  j } d d  l j } g  } |  j ƒ  xo |  j
 ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  
| j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d
 ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ
 } | j j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } | 
j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d d
 d d  ƒ| j t 
>temp_26|source:ó|start:9063|stop:9563
"/)/*)		N(
   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    ( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR   B   s   								t   QualSourcec           B   s/   e  Z
 e d  „ Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d 
 |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ | 
 j j d ƒ |  j r
>temp_27|source:ó|start:6947|stop:7447
| j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j d#
 ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd 
} n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g 
 t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" 
d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| 
j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ |
 j% d9 d0 d: t | ƒ
>temp_28|source:ó|start:1515|stop:2015
 ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   i   (   t  
 rangeR   t   join(   R   t   sequencet   piece_lengtht   tickst   x(    
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR   &   s    %c         C   s   |  j  j ƒ  d  S(   N(   R   t 
  close(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name__t   __module__R   t
   TrueR   R   R
>temp_29|source:ó|start:3987|stop:4487
  C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | 
d j |  j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6
d d 6|  j | <q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD]
 } | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti  
 R	   R   t   reverse(   t   next_regulart   hashlibt   sha1R   t   uppe
rt	   hexdigestR.   R&   R	   t   sortedR   R/   R   t   total_uniqueR6   ( 
  R   t   hasht  
>temp_30|source:ó|start:8872|stop:9372
ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyt&   visualize_sequence_length_distribution³   sx    
")"/)/*)	
	N(   R   R   R   RE   R   R7   R!   RD   R;   R
M   R   R6   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/
trainingsets2/Vibrio/utils/fastalib.pyR   B   s   								t   QualSo
urcec           B  
>temp_31|source:ó|start:1657|stop:2157
  tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  d  S(
   N(   R   t   close(   R   (    (    sJ   /mnt/data/azomer/reads-for-ass
embly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name__t 
  __module__R   t   TrueR   R   R   R   R   (    (    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR       s   
		t	   Read
>temp_32|source:ó|start:7260|stop:7760
| j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 
ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g 
 ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6
 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? 
ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] 
Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht
   gridt   colors
>temp_33|source:ó|start:10151|stop:10651
 râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _
 g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(
   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5
   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		
+#+c         C   s   |  j  j ƒ  d  S(   N(
   R1   R   (   R
>temp_34|source:ó|start:4590|stop:5090
   	Jc         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd 
 S(   N(   R   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer
/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c 
        C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  j 
} |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ | d |  
_	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/ 
  R-   R>   R   R	 
>temp_35|source:ó|start:6247|stop:6747
 C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    ( 
  R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s    				c
         C   sX  d d  l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  r
Ÿ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  | j 
t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ 
pâ d } n  d g |
>temp_36|source:ó|start:6574|stop:7074
 d d k s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  | j t |  j
 ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d 
} n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j
 i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d d !
ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ|
 j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k 
r;| d% p5d } n
>temp_37|source:ó|start:5906|stop:6406
   (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trai
ningsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c   
      C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /
mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   © 
  s    c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ
 d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ
   /mnt/data/azomer/
>temp_38|source:ó|start:2346|stop:2846
j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d
 ƒ d  S(   Niè  i    i   s)   [fastalib] Reading FASTA into memory: %ss 
  (   R   t	   sequencest   SequenceSourcet   fastat   nextt   post   sys
t   stderrR   t   flusht   appendR	   R   (   R   t   f_name(    (    sJ
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR 
  /   s    		(c         C   s   |  j  j ƒ  d  S(   N(  
 R    R   (   R  
>temp_39|source:ó|start:9734|stop:10234
_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R
8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/util
s/fastalib.pyR     s    									:c         C   sF  |  
j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ
 r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t S
t Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7}
 q7 | j ƒ  |  _ g
>temp_40|source:ó|start:8245|stop:8745
™é?s   x-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   
pyplott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&
   R   R   t   maxt   intt   figuret   rcParamst   updatet   rct   GridS
pect   subplotRO   R   t   subplots_adjustt   plott   fill_betweenR   t  
 ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   
t   textt   numpyt   meant   stdt   minR)   t   savefigt   show(   R   
t   titlet   destt
>temp_41|source:ó|start:3227|stop:3727
  c         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d 
 |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d
 ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ | 
 _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_pa
tht   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R	   R   R   R   
t   unique_hash_dictt   unique_hash_listt   unique_next_hashR   t   file_po
intert   seekt	   t
>temp_42|source:ó|start:9303|stop:9803
alib.pyR   B   s   								t   QualSourcec           B   s/   e  Z
 e d  „ Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d 
 |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ | 
 j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q
’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,  
 R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   
R5   R6   (   R   
>temp_43|source:ó|start:5709|stop:6209
rainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		#
c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq W
t S(   N(   R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mn
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_
seq_by_read_id    s    c         C   s   |  j  j ƒ  d  S(   N(  
 R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/f
>temp_44|source:ó|start:3618|stop:4118
   R   t   unique_hash_dictt   unique_hash_listt   unique_next_hashR   t 
  file_pointert   seekt	   total_seqR   t	   readlinest   startswitht   resett
   init_unique_hash(   R   R)   R,   R   R-   t   l(    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   C   s(    
													:	c         C   s  x§ |  j  ƒ  r© t j
 |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j |  j ƒ |  j | d 
c d 7<q i |  j
>temp_45|source:ó|start:3297|stop:3797
  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |
  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^
 q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fas
ta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R	   R   
R   R   t   unique_hash_dictt   unique_hash_listt   unique_next_hashR   t
   file_pointert   seekt	   total_seqR   t	   readlinest   startswitht   reset
t   init_unique_has
>temp_46|source:ó|start:1840|stop:2340
se(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.pyR   *   s    (	   t   __name__t   __module__R   t   T
rueR   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assem
bly/trainingsets2/Vibrio/utils/fastalib.pyR       s   		t	   ReadFas
tac           B   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  
|  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d
 k r… t j j d 
>temp_47|source:ó|start:1185|stop:1685
      s    c         C   s0   | r |  j  | ƒ } n  |  j j d | ƒ 
d  S(   Ns   %s(   R   R   R   (   R   R   R   (    (    sJ   /mnt/dat
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s   
 iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t
  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   i  
 (   t   rangeR   t   join(   R   t   sequencet   piece_lengtht   tickst
   x(    (    sJ   
>temp_48|source:ó|start:1851|stop:2351
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR   *   s    (	   t   __name__t   __module__R   t   TrueR   R 
  R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/fastalib.pyR       s   		t	   ReadFastac        
   B   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  | 
 _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k r… t 
j j d |  j j ƒ
>temp_49|source:ó|start:7616|stop:8116
S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t   fi
gsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   line
stylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t 
  topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s
   number of sequencess   sequence lengthi2   i   t   rotationiZ   t   sizes
   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   
%st   weights   xx
>temp_50|source:ó|start:1378|stop:1878
ingsets2/Vibrio/utils/fastalib.pyR   !   s    iP   c         C   sk   
t  d t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | |
 | | | d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join(  
 R   t   sequencet   piece_lengtht   tickst   x(    (    sJ   /mnt/data/azo
mer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c
         C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (    (
    sJ   /mnt/data/a
>temp_51|source:ó|start:7010|stop:7510
 d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t
 |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d t |
	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d
 d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d)
 d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ
 | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ
 f d; d6 d d< d) 
>temp_52|source:ó|start:1956|stop:2456
(	   t   __name__t   __module__R   t   TrueR   R   R   R   R   (    (  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyR       s   		t	   ReadFastac           B   s   e  Z d  „  
Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  
j j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ t
 j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S
(   Niè  i    i  
>temp_53|source:ó|start:2701|stop:3201
bly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		(c  
       C   s   |  j  j ƒ  d  S(   N(   R    R   (   R   (    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   
>   s    (   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s   	R   c      
     B   sn   e  Z e e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „ 
 Z	 d „  Z d „  Z
>temp_54|source:ó|start:1713|stop:2213
ssembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c         C  
 s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s 
   (	   t   __name__t   __module__R   t   TrueR   R   R   R   R   (    
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyR       s   		t	   ReadFastac           B   s   e  Z d  
„  Z d „  Z RS( 
>temp_55|source:ó|start:3775|stop:4275
ett   init_unique_hash(   R   R)   R,   R   R-   t   l(    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   C   s( 
   													:	c         C   s  x§ |  j  ƒ  r© t
 j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j |  j ƒ |  j | 
d c d 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g
  |  j D] } |  j | d | f ^ qº d t ƒD] } | d ^ qä |  _ t | 
 j ƒ |  _ |  j ƒ
>temp_56|source:ó|start:3203|stop:3703
	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ | |  _ | | 
 _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t | 
 j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j
 d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   
>(   t   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR
"   R	   R   R   R   t   unique_hash_dictt   unique_hash_listt   unique_ne
xt_hashR   t   fil
>temp_57|source:ó|start:9641|stop:10141
  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qu
alst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R 
  R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyR     s    									:c        
 C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } 
| s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  
j d 7_ t St Sn  
>temp_58|source:ó|start:10390|stop:10890
5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer
/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		
+#+c         C   s   |  j  j ƒ  d  S(   N(
   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/train
ingsets2/Vibrio/utils/fastalib.pyR   9  s    c         C   sA   d |  _  
d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R* 
  R	   R‘   R’   R 
>temp_59|source:ó|start:10323|stop:10823
  R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ  
 (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		+
#+c         C   s   |  j  j ƒ  d  S(   N(   R1   R  
 (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibr
io/utils/fastalib.pyR   9  s    c         C   sA   d |  _  d  |  _ d  |
  _ d  |  _ g  |  
>temp_60|source:ó|start:9617|stop:10117
 ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+ 
  R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR  
 R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    							
		:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ 
d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t
	 | ƒ ^ qw |  _ 
>temp_61|source:ó|start:10272|stop:10772
 _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘
   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK  
 t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/u
tils/fastalib.pyR!     s*    		+#+c     
    C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mn
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  
s    c         
>temp_62|source:ó|start:76|stop:576
 YZ d d d „  ƒ  YZ d d d „  ƒ  YZ d d d	 „  ƒ  YZ e d k rÄ e e  j
 d ƒ Z	 e	 j d e e  j ƒ d k rº e  j d n d ƒ n  d S(   iÿÿÿÿNt
   FastaOutputc           B   sJ   e  Z d  „  Z e e d „ Z d „  Z e d
 „ Z d d „ Z d „  Z RS(   c         C   s   | |  _  t | d ƒ |
  _ d  S(   Nt   w(   t   output_file_patht   opent   output_file_obj(  
 t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastali
>temp_63|source:ó|start:6135|stop:6635
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ | 
 j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   R   (
    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fas
talib.pyR6   ¬   s    				c         C   sX  d d  l  j } d d  l j 
} g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d 
|  j ƒ t j j ƒ
>temp_64|source:ó|start:2551|stop:3051
t   fastat   nextt   post   syst   stderrR   t   flusht   appendR	   R 
  (   R   t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyR   /   s    		(c         C
   s   |  j  j ƒ  d  S(   N(   R    R   (   R   (    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    
(   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibr
>temp_65|source:ó|start:5925|stop:6425
ead_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/ut
ils/fastalib.pyt   get_seq_by_read_id    s    c         C   s   |  
j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/rea
ds-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c      
   C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (
   R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer
/reads-for-assembly/
>temp_66|source:ó|start:5065|stop:5565
  R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    (   
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
RD   x   s    	!&c         C   sì   d  |  _ |  j j ƒ  d 
j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t
 St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q
. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R* 
  R   R1   t   rea
>temp_67|source:ó|start:1219|stop:1719
  | r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R
   (   R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyR   !   s    iP   c         C   sk   t
  d t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | 
| | | d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join(   
R   t   sequencet   piece_lengtht   tickst   x(    (    sJ   /mnt/data/azom
er/reads-for-assembl
>temp_68|source:ó|start:7901|stop:8401
   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   sequence lengthi2 
  i   t   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt   ymint   y
maxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   centeri
	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %
s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t 
  matplotlib.pyplott   pyplott   matplotlib.gridspect   gridspecR6   R!   R" 
  R#   R$   R   R% 
>temp_69|source:ó|start:7877|stop:8377
ghtg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequence
ss   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmin
t   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-
larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %
.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .pn
g(   i   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.gridspect
   gridspecR6   R!  
>temp_70|source:ó|start:3893|stop:4393
sets2/Vibrio/utils/fastalib.pyR   C   s(    													:
	c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k
 rq |  j | d j |  j ƒ |  j | d c d 7<q i |  j d 6|  j g 
d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j D] } |  j | d | f ^ 
qº d t ƒD] } | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   
t   counti   R	   R   t   reverse(   t   next_regulart   hashlibt   sha1
R   t   uppert	   
>temp_71|source:ó|start:224|stop:724
(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  „  Z e e d „ Z d
 „  Z e d „ Z d d „ Z d „  Z RS(   c         C   s   | |  _  t
 | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent   output_
file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembly/tr
ainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c         C   s
`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j ƒ |  
j | j | ƒ d  S
>temp_72|source:ó|start:7989|stop:8489
   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt   ymint   ymaxg    
  4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   centeri	   s  
 axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max:
 %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t   matplo
tlib.pyplott   pyplott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R
$   R   R%   R&   R   R   t   maxt   intt   figuret   rcParamst   updatet
   rct   GridSpect
>temp_73|source:ó|start:8095|stop:8595
 %st   weights   xx-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6
   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-
larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   pyplott  
 matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R 
  t   maxt   intt   figuret   rcParamst   updatet   rct   GridSpect   su
bplotRO   R   t   subplots_adjustt   plott   fill_betweenR   t   ylabelt 
  xlabelR*   t   xt
>temp_74|source:ó|start:2328|stop:2828
t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j 
ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib] Reading FASTA 
into memory: %ss   (   R   t	   sequencest   SequenceSourcet   fastat   ne
xtt   post   syst   stderrR   t   flusht   appendR	   R   (   R   t   
f_name(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/ut
ils/fastalib.pyR   /   s    		(c         C   s   |  j  j 
ƒ  d  S(   N(   R
>temp_75|source:ó|start:1505|stop:2005
  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   i  
 (   t   rangeR   t   join(   R   t   sequencet   piece_lengtht   tickst
   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/uti
ls/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  d  S(   N( 
  R   t   close(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/tr
ainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name__t   __modu
le__R   t   TrueR
>temp_76|source:ó|start:9731|stop:10231
als_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R, 
  R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/u
tils/fastalib.pyR     s    									:c         C   sF  
|  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t |
 ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t
 St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  
7} q7 | j ƒ  |  _
>temp_77|source:ó|start:8860|stop:9360
t   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_length_distribution³   sx 
   ")"/)/*)
		N(   R   R   R   RE   R   R7   R!  
 RD   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   							
	t   QualSourcec   
>temp_78|source:ó|start:5444|stop:5944
Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni
   t    R(   (   R*   R   R1   t   readlinet   stripR	   R   R"   R   RE   
R5   R2   t   tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    	
	#c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |
  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R   t   
read_id(    (    sJ 
>temp_79|source:ó|start:9439|stop:9939
   C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |
  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] }
 | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals
_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t
   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s 
   								
>temp_80|source:ó|start:7728|stop:8228
s.linewidtht   gridt   colors   0.50t	   linestylet   -t	   linewidthgš™™™™™
¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï
?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   seque
nce lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt
   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   h
at   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: 
%.2f / min: %s / max
>temp_81|source:ó|start:1142|stop:1642
ly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c         C   s0   |
 r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R   
(   R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fastalib.pyR   !   s    iP   c         C   sk   t  d
 t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | 
| | d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join(   R 
  t   sequencet   
>temp_82|source:ó|start:10549|stop:11049
+#+c         C   s   |  j  j ƒ  d  S(   N
(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trai
ningsets2/Vibrio/utils/fastalib.pyR   9  s    c         C   sA   d |  _ 
 d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*
   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					(
   R   R   R   R
>temp_83|source:ó|start:7531|stop:8031
 j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n
 Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t
   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	  
 linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸
ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g33333
3Ã?s   number of sequencess   sequence lengthi2   i   t   rotationiZ   t   
sizes   xx-smallt 
>temp_84|source:ó|start:1966|stop:2466
 __name__t   __module__R   t   TrueR   R   R   R   R   (    (    (    sJ 
  /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   
    s   		t	   ReadFastac           B   s   e  Z d  „  Z d „  Z
 RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r
´ |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  
n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè 
 i    i   s)   [fa
>temp_85|source:ó|start:8688|stop:9188
nR)   t   savefigt   show(   R   t   titlet   destt   max_seq_lent	   xti
ckstept	   ytickstept   pltRh   t   sequence_lengthst   seq_len_distributionR
8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assem
bly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_length_distrib
ution³   sx    ")"/)
/*)		N(   R   R   R   RE   R
   R7   R!   RD   R;
>temp_86|source:ó|start:9490|stop:9990
 |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  | 
 _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(
   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   q
uals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,
   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR     s    									:c         C   sF 
 |  j  j ƒ  d j 
>temp_87|source:ó|start:4480|stop:4980
asht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyR7   _   s    	Jc         C   s!   |
  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (
    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fas
talib.pyR!   r   s    	c         C   s£   |  j  r› |  j d k r” |  j |
  j k  r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ 
 |  _ | d |  _ 
>temp_88|source:ó|start:2319|stop:2819
d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j 
|  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib] Readi
ng FASTA into memory: %ss   (   R   t	   sequencest   SequenceSourcet   fas
tat   nextt   post   syst   stderrR   t   flusht   appendR	   R   (   R
   t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastalib.pyR   /   s    		(c         C   s   
|  j  j ƒ  d  S( 
>temp_89|source:ó|start:8472|stop:8972
 rct   GridSpect   subplotRO   R   t   subplots_adjustt   plott   fill_bet
weenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt  
 figtextRE   t   textt   numpyt   meant   stdt   minR)   t   savefigt   s
how(   R   t   titlet   destt   max_seq_lent	   xtickstept	   ytickstept  
 pltRh   t   sequence_lengthst   seq_len_distributionR8   t   figt   gst   
ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyt&
>temp_90|source:ó|start:2599|stop:3099
   t   flusht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/
azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    
		(c         C   s   |  j  j ƒ  d  S(   N(   R    R   (
   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR   >   s    (   R   R   R   R   (    (    (    sJ   /m
nt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .  
 s   	R   c    
>temp_91|source:ó|start:2035|stop:2535
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR       s   		t	   ReadFastac           B   s   e  Z d  „ 
 Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ | 
 j j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ t
 j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  
S(   Niè  i    i   s)   [fastalib] Reading FASTA into memory: %ss   (   R
   t	   sequencest 
>temp_92|source:ó|start:4807|stop:5307
yR!   r   s    	c         C   s£   |  j  r› |  j d k r” |  j |  j k 
 r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ 
| d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   
R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   h
ash_entry(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyRD   x   s    	!&c         C   sì   d  |  
_ |  j j ƒ  d j
>temp_93|source:ó|start:4697|stop:5197
_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   s£   |  j  r›
 |  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc
 | d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   N
i    i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   
R   t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for
-assembly/trainingse
>temp_94|source:ó|start:10505|stop:11005
ibrio/utils/fastalib.pyR!     s*    		+#+c
         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    s
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
   9  s    c         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _
 |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   ( 
  R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/u
tils/fastalib.pyR6  
>temp_95|source:ó|start:6690|stop:7190
 | sé t | ƒ t t | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |
	 | c d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d 
d d ƒ| j d d ƒ } | j | d d !ƒ } | j t ƒ | j d d d d
 d d d d ƒ | j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d
 d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t
 |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d t |
	 ƒ d | ƒ g  t
>temp_96|source:ó|start:2860|stop:3360
  /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR  
 >   s    (   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s   	R   c     
      B   sn   e  Z e e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „
  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  
d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ 
g  |  _ d |  _ t
>temp_97|source:ó|start:10679|stop:11179
azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    
c         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ
 d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    ( 
   sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.
pyR6   <  s    					(   R   R   R   R   R!   R   R6   (    (    ( 
   sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.
pyR     s   
>temp_98|source:ó|start:5888|stop:6388
 R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s
    c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R 
  (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/
fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  |
  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (  
 R   (    (    sJ  
>temp_99|source:ó|start:1473|stop:1973
| ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | | | d 
!^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join(   R   t   sequ
encet   piece_lengtht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c         C  
 s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s 
   (	   t   __name
>temp_100|source:ó|start:9761|stop:10261
  total_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/dat
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s   
 									:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d 
 |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j
 j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j 
 j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j
 j ƒ  D] } t	 | 
>temp_101|source:ó|start:3258|stop:3758
  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  
_ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g 
 |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ 
 n  d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet	   lazy_i
nitt   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   uniqu
e_hash_listt   unique_next_hashR   t   file_pointert   seekt	   total_seqR  
 t	   readlinest   s
>temp_102|source:ó|start:3125|stop:3625
e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 
d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ | |  _ | |  _
 d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j
  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d
 ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(
   t   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR" 
  R	   R   R   R 
>temp_103|source:ó|start:1278|stop:1778
  %s(   R   R   R   (   R   R   R   (    (    sJ   /mnt/data/azomer/read
s-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s    iP   c
         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ
 d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   i   (   t   ra
ngeR   t   join(   R   t   sequencet   piece_lengtht   tickst   x(    (  
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yR   &   s    %c
>temp_104|source:ó|start:3683|stop:4183
xt_hashR   t   file_pointert   seekt	   total_seqR   t	   readlinest   starts
witht   resett   init_unique_hash(   R   R)   R,   R   R-   t   l(    (   
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
R   C   s(    													:	c         C   s  x§ | 
 j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j |  j ƒ
 |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j | 
<q Wg  t	 g  |  j 
>temp_105|source:ó|start:8342|stop:8842
tlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R   t   
maxt   intt   figuret   rcParamst   updatet   rct   GridSpect   subplotRO
   R   t   subplots_adjustt   plott   fill_betweenR   t   ylabelt   xlabe
lR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt   num
pyt   meant   stdt   minR)   t   savefigt   show(   R   t   titlet   de
stt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_lengths
t   seq_len_distrib
>temp_106|source:ó|start:10772|stop:11272
C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   N
i    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/
data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s
    					(   R   R   R   R   R!   R   R6   (    (    (    sJ   /mnt/
data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s
   		t   __main__i   R‚   i   i   R*   (    (    (    (    (   R#   R
|   R<   R    R   R
>temp_107|source:ó|start:1107|stop:1607
  /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR  
    s    c         C   s0   | r |  j  | ƒ } n  |  j j d | ƒ d 
 S(   Ns   %s(   R   R   R   (   R   R   R   (    (    sJ   /mnt/data/
azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s    
iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t  
d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   i   (
   t   rangeR   t
>temp_108|source:ó|start:2189|stop:2689
 d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _
 x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j 
ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ
 d  S(   Niè  i    i   s)   [fastalib] Reading FASTA into memory: %ss   (
   R   t	   sequencest   SequenceSourcet   fastat   nextt   post   syst 
  stderrR   t   flusht   appendR	   R   (   R   t   f_name(    (    sJ   
/mnt/data/azomer/rea
>temp_109|source:ó|start:3362|stop:3862
  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j
 d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t  
 >(   t   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_case
R"   R	   R   R   R   t   unique_hash_dictt   unique_hash_listt   unique_n
ext_hashR   t   file_pointert   seekt	   total_seqR   t	   readlinest   start
switht   resett   init_unique_hash(   R   R)   R,   R   R-   t   l(    (  
  sJ   /mnt/data/azo
>temp_110|source:ó|start:9311|stop:9811
   B   s   								t   QualSourcec           B   s/   e  Z e d  „
 Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ |
 |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d
 ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  
_ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	
   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6 
  (   R   R   R, 
>temp_111|source:ó|start:3288|stop:3788
 |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ
 |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ
 r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(  
 t   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R
	   R   R   R   t   unique_hash_dictt   unique_hash_listt   unique_next_ha
shR   t   file_pointert   seekt	   total_seqR   t	   readlinest   startswitht
   resett   init_u
>temp_112|source:ó|start:720|stop:1220
d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt   idt   le
nt   idst	   write_seqt   seq(   R   t   entryt   splitt   store_frequenc
ies(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyt   store   s    *c         C   s   |  j  j d | ƒ 
d  S(   Ns   >%s(   R   t   write(   R   R	   (    (    sJ   /mnt/data/a
zomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c
         C   s0  
>temp_113|source:ó|start:1772|stop:2272
  %c         C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyR   *   s    (	   t   __name__t   __module__R   t   TrueR   R  
 R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyR       s   		t	   ReadFastac        
   B   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  
_ t | ƒ |  _ x‘
>temp_114|source:ó|start:8902|stop:9402
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visuali
ze_sequence_length_distribution³   sx    ")
"/)/*)		N(
   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (
    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fas
talib.pyR   B   s   								t   QualSourcec           B   s/   e  Z
 e d  „ Z d „  
>temp_115|source:ó|start:7881|stop:8381
\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess 
  sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint  
 xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larg
et   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f 
/ std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png( 
  i   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.gridspect   g
ridspecR6   R!   R" 
>temp_116|source:ó|start:4351|stop:4851
rt   hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t   sortedR 
  R/   R   t   total_uniqueR6   (   R   t   hasht   i(    (    sJ   /mnt/da
ta/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s  
  	Jc         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  
S(   N(   R   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c  
       C   s£   | 
>temp_117|source:ó|start:6544|stop:7044
|  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ
 t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t |
 ƒ t t | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<
qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j
 d d ƒ } | j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ
 | j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j
 d# ƒ | j d$ ƒ 
>temp_118|source:ó|start:6892|stop:7392
 !ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ
| j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k
 r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  | j t | | d 
| ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ
 D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ
 d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j 
i d& d8 6ƒ | j 
>temp_119|source:ó|start:3830|stop:4330
   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/util
s/fastalib.pyR   C   s(    													:	c         C
   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d
 j |  j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6d 
d 6|  j | <q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD] }
 | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	
   R   t   reverse
>temp_120|source:ó|start:6404|stop:6904
r/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s    			
	c         C   sX  d d  l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ
  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  | 
j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d 
ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ }
 | j j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j 
| d d !ƒ } | j
>temp_121|source:ó|start:3373|stop:3873
_ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ 
| ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t  
 fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R	   R
   R   R   t   unique_hash_dictt   unique_hash_listt   unique_next_hashR 
  t   file_pointert   seekt	   total_seqR   t	   readlinest   startswitht   r
esett   init_unique_hash(   R   R)   R,   R   R-   t   l(    (    sJ   /mnt
/data/azomer/reads-f
>temp_122|source:ó|start:7155|stop:7655
 j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* 
ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 
d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ 
ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t)
 | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d>
 ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i 
   i   s   [fasta
>temp_123|source:ó|start:4164|stop:4664
q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD] } | d ^ qä 
|  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t   r
everse(   t   next_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.
   R&   R	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t   hasht
   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/uti
ls/fastalib.pyR7   _   s    	Jc         C   s!   |  j 
 r |  j ƒ  S|  j 
>temp_124|source:ó|start:3019|stop:3519
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s   	R   c    
       B   sn   e  Z e e e d  „ Z d „  Z d „  Z d „  Z d „  Z d 
„  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _ 
 d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _
 g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  
|  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  
n  d  S(   Ni    t
>temp_125|source:ó|start:6515|stop:7015
  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d
 k r† t j j	 d |  j ƒ t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ 
 t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } n  d g | d }
	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ | j 
d d d d d d d ƒ| j d d ƒ } | j | d d !ƒ } | j t ƒ |
 j d d d d d d d d ƒ | j |	 d d d d  ƒ| j t d | d ƒ |
	 d! d d d d d" 
>temp_126|source:ó|start:2906|stop:3406
sets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R   R   (    (   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR   .   s   	R   c           B   sn   e  Z e e e d  „ Z d „  
Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(
   c         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d
  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d
 ƒ |  j r£ d  | 
>temp_127|source:ó|start:9746|stop:10246
  R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (  
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yR     s    									:c         C   sF  |  j  j ƒ  d 
j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ
  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j
 d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ 
 |  _ g  |  j j ƒ
>temp_128|source:ó|start:9707|stop:10207
  R	   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5  
 R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibrio/utils/fastalib.pyR     s    									
:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |
  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^
 qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ 
ƒ Pn  | d | j 
>temp_129|source:ó|start:4469|stop:4969
R   t   hasht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fastalib.pyR7   _   s    	Jc        
 C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   
(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyR!   r   s    	c         C   s£   |  j  r› |  j d k
 r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc | d n |
 d j ƒ  |  _ |
>temp_130|source:ó|start:10395|stop:10895
2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/read
s-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		
+#+c         C   s   |  j  j ƒ  d  S(   N(   R
1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingse
ts2/Vibrio/utils/fastalib.pyR   9  s    c         C   sA   d |  _  d  | 
 _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	 
  R‘   R’   R   R1 
>temp_131|source:ó|start:1776|stop:2276
c         C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR   *   s    (	   t   __name__t   __module__R   t   TrueR   R   R 
  R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyR       s   		t	   ReadFastac           B
   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t
 | ƒ |  _ x‘ |  
>temp_132|source:ó|start:2574|stop:3074
 post   syst   stderrR   t   flusht   appendR	   R   (   R   t   f_name
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyR   /   s    		(c         C   s   |  j  j ƒ  d 
 S(   N(   R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assem
bly/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R   R
   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR 
>temp_133|source:ó|start:2954|stop:3454
   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/t
rainingsets2/Vibrio/utils/fastalib.pyR   .   s   	R   c           B   sn 
  e  Z e e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d
 „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ | | 
 _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d | 
 _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D]
 } | j d ƒ r¶ |
>temp_134|source:ó|start:4591|stop:5091
  	Jc         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  
S(   N(   R   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c  
       C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  j 
} |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ | d |  _
	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/  
 R-   R>   R   R	  
>temp_135|source:ó|start:9068|stop:9568
/)/*)		N(   R
   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    ( 
   sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.
pyR   B   s   								t   QualSourcec           B   s/   e  Z e d
  „ Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _
 | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j
 d ƒ |  j r d  
>temp_136|source:ó|start:9143|stop:9643
   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    ( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR   B   s   								t   QualSourcec           B   s/   e  Z
 e d  „ Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d 
 |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ | 
 j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q
’ ƒ |  _ |  j ƒ  
>temp_137|source:ó|start:5757|stop:6257
    		#c         C   s7   |  j  ƒ  x& |  j 
ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R 
  t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c         C   
s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c
         C   s8   
>temp_138|source:ó|start:9486|stop:9986
 d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d
  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d 
 S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	
   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R 
  R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.pyR     s    									:c         C   
sF  |  j  j ƒ  d 
>temp_139|source:ó|start:9992|stop:10492
 |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _
 g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ
 râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _
 g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R( 
  t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5 
  R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/r
eads-for-assembly/tr
>temp_140|source:ó|start:1451|stop:1951
  sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] }
 | | | | | d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   j
oin(   R   t   sequencet   piece_lengtht   tickst   x(    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s 
   %c         C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R  
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR   *   s
>temp_141|source:ó|start:7926|stop:8426
333333Ã?s   number of sequencess   sequence lengthi2   i   t   rotationiZ   
t   sizes   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…
ëQ¸î?s   %st   weights   xx-larget   hat   centeri	   s   axes.edgecolorg{
®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™
é?s   x-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   p
yplott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R& 
  R   R   t   maxt
>temp_142|source:ó|start:588|stop:1088
nit__   s    	c         C   s`   | j  r9 | r9 |  j d | j d t | j
 ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   fre
quency:%d(   t   uniquet   write_idt   idt   lent   idst	   write_seqt   
seq(   R   t   entryt   splitt   store_frequencies(    (    sJ   /mnt/data/
azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   store   s
    *c         C   s   |  j  j d | ƒ d  S(   Ns   >%s(   R  
 t   write(   R  
>temp_143|source:ó|start:589|stop:1089
it__   s    	c         C   s`   | j  r9 | r9 |  j d | j d t | j
 ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   freq
uency:%d(   t   uniquet   write_idt   idt   lent   idst	   write_seqt   s
eq(   R   t   entryt   splitt   store_frequencies(    (    sJ   /mnt/data/a
zomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   store   s 
   *c         C   s   |  j  j d | ƒ d  S(   Ns   >%s(   R   
t   write(   R   
>temp_144|source:ó|start:9067|stop:9567
/)/*)		N(   R
   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    (
    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib
.pyR   B   s   								t   QualSourcec           B   s/   e  Z e 
d  „ Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  
_ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j
 d ƒ |  j r d 
>temp_145|source:ó|start:2606|stop:3106
 flusht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		
(c         C   s   |  j  j ƒ  d  S(   N(   R    R   (   R  
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR   >   s    (   R   R   R   R   (    (    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s   
	R   c          
>temp_146|source:ó|start:822|stop:1322
qt   seq(   R   t   entryt   splitt   store_frequencies(    (    sJ   /mnt
/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   store
   s    *c         C   s   |  j  j d | ƒ d  S(   Ns   >%s( 
  R   t   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c         C   s0   |
 r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R   
(   R   R   R   
>temp_147|source:ó|start:5807|stop:6307
  C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6 
  R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/read
s-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    
s    c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R
   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  
|  _ |  j j d ƒ
>temp_148|source:ó|start:1312|stop:1812
R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyR   !   s    iP   c         C   sk   t  d t | ƒ
 | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | | | d 
!^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join(   R   t   seq
uencet   piece_lengtht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c         C 
  s   |  j  j ƒ  
>temp_149|source:ó|start:4402|stop:4902
R.   R&   R	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t   has
ht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/u
tils/fastalib.pyR7   _   s    	Jc         C   s!   |  
j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyR!   r   s    	c         C   s£   |  j  r› |  j d k r” |  j |  
j k  r” |  j |  j
>temp_150|source:ó|start:2618|stop:3118
ppendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		(c
         C   s   |  j  j ƒ  d  S(   N(   R    R   (   R   (    (    s
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
   >   s    (   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/read
s-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s   	R   c   
        B   sn   e 
>temp_151|source:ó|start:1952|stop:2452
    (	   t   __name__t   __module__R   t   TrueR   R   R   R   R   (   
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR       s   		t	   ReadFastac           B   s   e  Z d 
 „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘
 |  j j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ
 t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ 
d  S(   Niè  i    
>temp_152|source:ó|start:1900|stop:2400
ly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name__t   _
_module__R   t   TrueR   R   R   R   R   (    (    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR       s   	
	t	   ReadFastac           B   s   e  Z d  „  Z d „  Z RS(   c   
      C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d 
d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j | 
 j j ƒ |  j j |
>temp_153|source:ó|start:8189|stop:8689
 mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdf
s   .png(   i   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.gr
idspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R   t   maxt  
 intt   figuret   rcParamst   updatet   rct   GridSpect   subplotRO   R  
 t   subplots_adjustt   plott   fill_betweenR   t   ylabelt   xlabelR*   t
   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt   numpyt   
meant   stdt   min
>temp_154|source:ó|start:9405|stop:9905
d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d |  
_ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r 
d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d
  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst
	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R
   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.
>temp_155|source:ó|start:4099|stop:4599
 d 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  | 
 j D] } |  j | d | f ^ qº d t ƒD] } | d ^ qä |  _ t |  j 
ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t   reverse(   t  
 next_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t
   sortedR   R/   R   t   total_uniqueR6   (   R   t   hasht   i(    (    
sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
7   _   s    
>temp_156|source:ó|start:724|stop:1224
(   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt   idt   lent 
  idst	   write_seqt   seq(   R   t   entryt   splitt   store_frequencies(
    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fas
talib.pyt   store   s    *c         C   s   |  j  j d | ƒ d  
S(   Ns   >%s(   R   t   write(   R   R	   (    (    sJ   /mnt/data/azome
r/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c   
      C   s0   | 
>temp_157|source:ó|start:492|stop:992
    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fas
talib.pyt   __init__   s    	c         C   s`   | j  r9 | r9 |  j d 
| j d t | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns
   %s|%ss   frequency:%d(   t   uniquet   write_idt   idt   lent   idst	 
  write_seqt   seq(   R   t   entryt   splitt   store_frequencies(    (   
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
t   store   s    
>temp_158|source:ó|start:2349|stop:2849
ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ
 d  S(   Niè  i    i   s)   [fastalib] Reading FASTA into memory: %ss   (
   R   t	   sequencest   SequenceSourcet   fastat   nextt   post   syst 
  stderrR   t   flusht   appendR	   R   (   R   t   f_name(    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /
   s    		(c         C   s   |  j  j ƒ  d  S(   N(   R 
   R   (   R   ( 
>temp_159|source:ó|start:5840|stop:6340
r2 |  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R   t
   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibr
io/utils/fastalib.pyt   get_seq_by_read_id    s    c         C   s 
  |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azome
r/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c   
      C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni
    (   R"   R*   R
>temp_160|source:ó|start:4641|stop:5141
 j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   ( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR!   r   s    	c         C   s£   |  j  r› |  j d k r” |  j | 
 j k  r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  
|  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R  
 (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t   False(   R  
 t   hash_entry(    
>temp_161|source:ó|start:9239|stop:9739
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   
s   								t   QualSourcec           B   s/   e  Z e d  „ Z d 
„  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ 
d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  
j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j
 ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   
qualst	   quals_intR
>temp_162|source:ó|start:1109|stop:1609
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   
   s    c         C   s0   | r |  j  | ƒ } n  |  j j d | ƒ d  S
(   Ns   %s(   R   R   R   (   R   R   R   (    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s    
iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t  d
 t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   i   ( 
  t   rangeR   t  
>temp_163|source:ó|start:1036|stop:1536
ƒ d  S(   Ns   >%s(   R   t   write(   R   R	   (    (    sJ   /mnt/da
ta/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR      s  
  c         C   s0   | r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns
   %s(   R   R   R   (   R   R   R   (    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s    iP   
c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t  d t | 
ƒ d ƒ D] } | 
>temp_164|source:ó|start:8159|stop:8659
colorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   va
gš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplo
tt   pyplott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R
%   R&   R   R   t   maxt   intt   figuret   rcParamst   updatet   rct  
 GridSpect   subplotRO   R   t   subplots_adjustt   plott   fill_betweenR 
  t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt   figtex
tRE   t   textt   
>temp_165|source:ó|start:5097|stop:5597
R   t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for
-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&
c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ
  } | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j
 |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ 
|  j d 7_ t S(   Ni   t    R(   (   R*   R   R1   t   readlinet   st
ripR	   R   R"   R 
>temp_166|source:ó|start:2053|stop:2553
data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR       s
   		t	   ReadFastac           B   s   e  Z d  „  Z d „  Z RS( 
  c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j
 j d d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  
j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i
   s)   [fastalib] Reading FASTA into memory: %ss   (   R   t	   sequencest
   SequenceSourcet
>temp_167|source:ó|start:7348|stop:7848
 | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | 
j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d
) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n 
Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading:
 %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   grid
t   colors   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™
™™™©?t   bottomg¸…
>temp_168|source:ó|start:3254|stop:3754
|  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i 
 |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t
 g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j
 ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet	   la
zy_initt   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   u
nique_hash_listt   unique_next_hashR   t   file_pointert   seekt	   total_se
qR   t	   readlinest
>temp_169|source:ó|start:4367|stop:4867
  sha1R   t   uppert	   hexdigestR.   R&   R	   t   sortedR   R/   R   t   
total_uniqueR6   (   R   t   hasht   i(    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s    
	Jc         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R  
 t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   s£ 
  |  j  r› |  j d 
>temp_170|source:ó|start:3830|stop:4330
   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/util
s/fastalib.pyR   C   s(    													:	c         C
   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d
 j |  j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6d 
d 6|  j | <q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD] }
 | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	
   R   t   reverse
>temp_171|source:ó|start:5151|stop:5651
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x
   s    	!&c         C   sì   d  |  _ |  j j ƒ  d j ƒ 
 |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t St	 
Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j
 rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R*   R  
 R1   t   readlinet   stripR	   R   R"   R   RE   R5   R2   t   tellR-   R> 
  (   R   R   t 
>temp_172|source:ó|start:9852|stop:10352
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    					
				:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |
  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D]
 } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j
 ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] 
} t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH
   RI   R	   R*   R‘
>temp_173|source:ó|start:1969|stop:2469
name__t   __module__R   t   TrueR   R   R   R   R   (    (    (    sJ   /
mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     
  s   		t	   ReadFastac           B   s   e  Z d  „  Z d „  Z R
S(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |
  j j d d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  n 
 |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i 
   i   s)   [fasta
>temp_174|source:ó|start:10617|stop:11117
 d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c         C   s
A   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    
(   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/
azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    
					(   R   R   R   R   R!   R   R6   (    (    (    sJ   /mnt/data/
azomer/reads-for-ass
>temp_175|source:ó|start:699|stop:1199
 |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet   w
rite_idt   idt   lent   idst	   write_seqt   seq(   R   t   entryt   spl
itt   store_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/train
ingsets2/Vibrio/utils/fastalib.pyt   store   s    *c         C   s 
  |  j  j d | ƒ d  S(   Ns   >%s(   R   t   write(   R   R	   (    
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR      s    
>temp_176|source:ó|start:7016|stop:7516
| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ
 d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d
 | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, |
 ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5
 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j
 g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d
; d6 d d< d) d= ƒ
>temp_177|source:ó|start:9360|stop:9860
        B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c      
   C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  | 
 _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] }
 | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_
file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t
   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/da
ta/azomer/reads-for-
>temp_178|source:ó|start:7655|stop:8155
lib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linew
idtht   gridt   colors   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i   
t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   b
lackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   sequence len
gthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt   ymin
t   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   c
enteri	   s   axes.
>temp_179|source:ó|start:5716|stop:6216
sets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		#
c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S( 
  N(   R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/
azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_
read_id    s    c         C   s   |  j  j ƒ  d  S(   N(   R1   R
   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib
>temp_180|source:ó|start:2281|stop:2781
 ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ t j j
	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   
Niè  i    i   s)   [fastalib] Reading FASTA into memory: %ss   (   R   t	 
  sequencest   SequenceSourcet   fastat   nextt   post   syst   stderrR  
 t   flusht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    	
	(c  
>temp_181|source:ó|start:5928|stop:6428
_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyt   get_seq_by_read_id    s    c         C   s   |  j  
j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c         
C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (  
 R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/re
ads-for-assembly/tra
>temp_182|source:ó|start:10641|stop:11141
   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.pyR   9  s    c         C   sA   d |  _  d  |  _ d 
 |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R‘  
 R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					(   R   R  
 R   R   R!   R   R6   (    (    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibr
>temp_183|source:ó|start:4927|stop:5427
 |  j rc | d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t S
d  S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   
R	   R   R   t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer
/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!
&c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x 
|  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r
¨ |  j j |  j j 
>temp_184|source:ó|start:8653|stop:9153
tt   numpyt   meant   stdt   minR)   t   savefigt   show(   R   t   tit
let   destt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequenc
e_lengthst   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt& 
  visualize_sequence_length_distribution³   sx    ")
"/)/*)		
N(   R   R
>temp_185|source:ó|start:10123|stop:10623
d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d |
 j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d
 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R  
 R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR!     s*    		+#+c         C   s
   |  j  j ƒ  d  S
>temp_186|source:ó|start:7321|stop:7821
d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ |
 j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ
 t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ 
Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i 
  s   [fastalib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s
   axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	   linewidth
gš™™™™™¹?i   t   l
>temp_187|source:ó|start:3865|stop:4365
/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   C   s(    			
										:	c         C   s  x§ |  j  ƒ  r© t j |  j 
j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j |  j ƒ |  j | d c d 
7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j D]
 } |  j | d | f ^ qº d t ƒD] } | d ^ qä |  _ t |  j ƒ |  
_ |  j ƒ  d  S(   NR   t   counti   R	   R   t   reverse(   t   next_
regulart   hashlibt
>temp_188|source:ó|start:8453|stop:8953
ramst   updatet   rct   GridSpect   subplotRO   R   t   subplots_adjustt 
  plott   fill_betweenR   t   ylabelt   xlabelR*   t   xtickst   ytickst 
  xlimt   ylimt   figtextRE   t   textt   numpyt   meant   stdt   minR)  
 t   savefigt   show(   R   t   titlet   destt   max_seq_lent	   xtickste
pt	   ytickstept   pltRh   t   sequence_lengthst   seq_len_distributionR8   t
   figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/t
rainingsets2/Vibrio/
>temp_189|source:ó|start:1950|stop:2450
s    (	   t   __name__t   __module__R   t   TrueR   R   R   R   R   ( 
   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyR       s   		t	   ReadFastac           B   s   e  Z 
d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ 
x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j 
ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ
 d  S(   Niè  i  
>temp_190|source:ó|start:6835|stop:7335
d d d d d d d ƒ| j d d ƒ } | j | d d !ƒ } | j t ƒ |
 j d d d d d d d d ƒ | j |	 d d d d  ƒ| j t d | d ƒ |
	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n 
 | d  k r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ|
 j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* 
ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 
d2 | d3 d d) d4 d
>temp_191|source:ó|start:9616|stop:10116
| ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+
   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR 
  R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    							
		:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _
 d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } 
t	 | ƒ ^ qw |  _
>temp_192|source:ó|start:6667|stop:7167
|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } n  d g |
 d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ
 | j d d d d d d d ƒ| j d d ƒ } | j | d d !ƒ } | j t
 ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ| j t d | 
d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5
d } n  | d  k r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) 
d* ƒ| j  t d t
>temp_193|source:ó|start:3363|stop:3863
 j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j
 d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   
>(   t   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR
"   R	   R   R   R   t   unique_hash_dictt   unique_hash_listt   unique_ne
xt_hashR   t   file_pointert   seekt	   total_seqR   t	   readlinest   starts
witht   resett   init_unique_hash(   R   R)   R,   R   R-   t   l(    (   
 sJ   /mnt/data/azom
>temp_194|source:ó|start:10448|stop:10948
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
R!     s*    		+#+c         C   s   | 
 j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c      
   C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(
   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /
mnt/data/azomer/read
>temp_195|source:ó|start:7193|stop:7693
 t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d 
d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | 
d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9
 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= 
ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy |
 j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss
   g      Y@i   t  
>temp_196|source:ó|start:4173|stop:4673
 g  |  j D] } |  j | d | f ^ qº d t ƒD] } | d ^ qä |  _ t 
|  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t   reverse( 
  t   next_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R
	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t   hasht   i(   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR7   _   s    	Jc         C   s!   |  j  r |  j
 ƒ  S|  j ƒ  Sd  S(
>temp_197|source:ó|start:979|stop:1479
ore   s    *c         C   s   |  j  j d | ƒ d  S(   Ns   >%s
(   R   t   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-ass
embly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c         C   s0 
  | r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R
   (   R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyR   !   s    iP   c         C   sk   t
  d t | ƒ | ƒ 
>temp_198|source:ó|start:3158|stop:3658
d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c      
   C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |
  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j 
r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ
  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht   None
t   namet	   lazy_initt   allow_mixed_caseR"   R	   R   R   R   t   unique
_hash_dictt   uniqu
>temp_199|source:ó|start:8680|stop:9180
dt   minR)   t   savefigt   show(   R   t   titlet   destt   max_seq_len
t	   xtickstept	   ytickstept   pltRh   t   sequence_lengthst   seq_len_distr
ibutionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_length
_distribution³   sx    ")"/
)/*)		N(   R   R   R  
 RE   R   R7   R!  
>temp_200|source:ó|start:5523|stop:6023
   t    R(   (   R*   R   R1   t   readlinet   stripR	   R   R"   R   RE  
 R5   R2   t   tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/data/a
zomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    	
	#c         C   s7   |  j  ƒ  x& |  j ƒ  r2 
|  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R   t  
 read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyt 
>temp_201|source:ó|start:10147|stop:10647
d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  
|  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG 
  R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE 
  R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azo
mer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    	
	+#+c         C   s   |  j  j ƒ  d  S(  
 N(   R1   R   ( 
>temp_202|source:ó|start:8811|stop:9311
nce_lengthst   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    
sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt
&   visualize_sequence_length_distribution³   sx    ")
"/)/*)		
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   
RŽ   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyR
>temp_203|source:ó|start:5200|stop:5700
/Vibrio/utils/fastalib.pyRD   x   s    	!&c         C   sì  
 d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk 
| |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ 
ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(  
 Ni   t    R(   (   R*   R   R1   t   readlinet   stripR	   R   R"   R   R
E   R5   R2   t   tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/dat
a/azomer/reads-for-a
>temp_204|source:ó|start:783|stop:1283
_idt   idt   lent   idst	   write_seqt   seq(   R   t   entryt   splitt
   store_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fastalib.pyt   store   s    *c         C   s   | 
 j  j d | ƒ d  S(   Ns   >%s(   R   t   write(   R   R	   (    (   
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
R      s    c         C   s0   | r |  j  | ƒ } n  |  j j d | ƒ
 d  S(   Ns   %s(
>temp_205|source:ó|start:8604|stop:9104
ytickst   xlimt   ylimt   figtextRE   t   textt   numpyt   meant   stdt 
  minR)   t   savefigt   show(   R   t   titlet   destt   max_seq_lent	  
 xtickstept	   ytickstept   pltRh   t   sequence_lengthst   seq_len_distribut
ionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_length_dis
tribution³   sx    ")"/
)/*)
>temp_206|source:ó|start:2242|stop:2742
 _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d 
k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  
j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib] Reading 
FASTA into memory: %ss   (   R   t	   sequencest   SequenceSourcet   fastat
   nextt   post   syst   stderrR   t   flusht   appendR	   R   (   R  
 t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.p
>temp_207|source:ó|start:7955|stop:8455
cess   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xm
int   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   x
x-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean:
 %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .
png(   i   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.gridspec
t   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R   t   maxt   intt
   figuret   rcPara
>temp_208|source:ó|start:8449|stop:8949
rcParamst   updatet   rct   GridSpect   subplotRO   R   t   subplots_adjus
tt   plott   fill_betweenR   t   ylabelt   xlabelR*   t   xtickst   ytick
st   xlimt   ylimt   figtextRE   t   textt   numpyt   meant   stdt   min
R)   t   savefigt   show(   R   t   titlet   destt   max_seq_lent	   xtic
kstept	   ytickstept   pltRh   t   sequence_lengthst   seq_len_distributionR8
   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vib
>temp_209|source:ó|start:7984|stop:8484
   t   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt   ymint   ymax
g      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   centeri	  
 s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s /
 max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t   m
atplotlib.pyplott   pyplott   matplotlib.gridspect   gridspecR6   R!   R"   R
#   R$   R   R%   R&   R   R   t   maxt   intt   figuret   rcParamst   up
datet   rct   Grid
>temp_210|source:ó|start:2744|stop:3244
   /   s    		(c         C   s   |  j  j ƒ  d  S(   N(
   R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R   R   (    ( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR   .   s   	R   c           B   sn   e  Z e e e d  „ Z d „
  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z R
S(   c         C 
>temp_211|source:ó|start:3733|stop:4233
eqR   t	   readlinest   startswitht   resett   init_unique_hash(   R   R)   
R,   R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyR   C   s(    													:
	c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k
 rq |  j | d j |  j ƒ |  j | d c d 7<q i |  j d 6|  j g 
d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j D] } |  j | d | f ^ 
qº d t ƒD] } |
>temp_212|source:ó|start:6111|stop:6611
1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingse
ts2/Vibrio/utils/fastalib.pyR   ©   s    c         C   s8   d |  _  d  | 
 _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R
   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyR6   ¬   s    				c         C   sX  d 
d  l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  
j d k r† t j j	
>temp_213|source:ó|start:8078|stop:8578
  à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   centeri	   s   axes.edg
ecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   v
agš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pypl
ott   pyplott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   
R%   R&   R   R   t   maxt   intt   figuret   rcParamst   updatet   rct 
  GridSpect   subplotRO   R   t   subplots_adjustt   plott   fill_betweenR
   t   ylabelt   x
>temp_214|source:ó|start:3190|stop:3690
 Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ |
 |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d
 |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ 
 D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S( 
  Ni    t   >(   t   fasta_file_patht   Nonet   namet	   lazy_initt   allo
w_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   unique_hash_listt
   unique_next_hash
>temp_215|source:ó|start:4818|stop:5318
    	c         C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |
  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _
 | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R
"   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyRD   x   s    	!&c         C   sì   d  |  _ |  j j
 ƒ  d j ƒ  |  _ 
>temp_216|source:ó|start:7146|stop:7646
 d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ 
qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | 
j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ 
| j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j(
 | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y |
 j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿ
ÿÿi'  i    i   s 
>temp_217|source:ó|start:7867|stop:8367
fî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number o
f sequencess   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smal
lt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weig
hts   xx-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s
 / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .p
dfs   .png(   i   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.
gridspect   gridspe
>temp_218|source:ó|start:4752|stop:5252
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c   
      C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  j }
 |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ | d |  _	
 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/   
R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    (    sJ   /
mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x 
  s    	!&
>temp_219|source:ó|start:6652|stop:7152
 j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d
 } n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j 
j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d d
 !ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ
| j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k
 r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  | j t | | d 
| ƒ d' d( d) d* ƒ
>temp_220|source:ó|start:9266|stop:9766
embly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   								t   
QualSourcec           B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(
   c         C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ 
d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  
j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (
   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   R 
  R1   R2   t   tot
>temp_221|source:ó|start:1951|stop:2451
    (	   t   __name__t   __module__R   t   TrueR   R   R   R   R   (  
  (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/
fastalib.pyR       s   		t	   ReadFastac           B   s   e  Z d
  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x
‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ
 t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ 
d  S(   Niè  i   
>temp_222|source:ó|start:9807|stop:10307
 R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibr
io/utils/fastalib.pyR     s    									:c         C   s
F  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ 
t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7
_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j 
ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7
_ t S(   Ni   RG 
>temp_223|source:ó|start:4301|stop:4801
nti   R	   R   t   reverse(   t   next_regulart   hashlibt   sha1R   t 
  uppert	   hexdigestR.   R&   R	   t   sortedR   R/   R   t   total_uniqueR6
   (   R   t   hasht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/t
rainingsets2/Vibrio/utils/fastalib.pyR7   _   s    	Jc  
       C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uni
queR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingset
s2/Vibrio/utils/fast
>temp_224|source:ó|start:4566|stop:5066
/fastalib.pyR7   _   s    	Jc         C   s!   |  j  r
 |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    ( 
   sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.
pyR!   r   s    	c         C   s£   |  j  r› |  j d k r” |  j |  j k
  r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _
 | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (  
 R   RA   R"   R.  
>temp_225|source:ó|start:1844|stop:2344
   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR   *   s    (	   t   __name__t   __module__R   t   TrueR
   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/
trainingsets2/Vibrio/utils/fastalib.pyR       s   		t	   ReadFastac 
          B   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  _
  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k
 r… t j j d |  j
>temp_226|source:ó|start:10504|stop:11004
Vibrio/utils/fastalib.pyR!     s*    		+#+
c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    
sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
   9  s    c         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _
 |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (
   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR6 
>temp_227|source:ó|start:6004|stop:6504
tils/fastalib.pyt   get_seq_by_read_id    s    c         C   s   | 
 j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c      
   C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    
(   R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mnt/data/azome
r/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s    			
	c         C   s
>temp_228|source:ó|start:3957|stop:4457
				:	c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j 
ƒ  } | |  j k rq |  j | d j |  j ƒ |  j | d c d 7<q i |  
j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j D] } |  j |
 d | f ^ qº d t ƒD] } | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  
d  S(   NR   t   counti   R	   R   t   reverse(   t   next_regulart   
hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t   sortedR   R/  
 R   t   total_uniq
>temp_229|source:ó|start:2609|stop:3109
usht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/rea
ds-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		(
c         C   s   |  j  j ƒ  d  S(   N(   R    R   (   R   ( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR   >   s    (   R   R   R   R   (    (    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s   	
R   c           B 
>temp_230|source:ó|start:2202|stop:2702
„  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j 
ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ t j j	
 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   N
iè  i    i   s)   [fastalib] Reading FASTA into memory: %ss   (   R   t	  
 sequencest   SequenceSourcet   fastat   nextt   post   syst   stderrR   
t   flusht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azo
mer/reads-for-assemb
>temp_231|source:ó|start:9967|stop:10467
F  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ 
t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7
_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j 
ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7
_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R 
  Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (  
  sJ   /mnt/data/azo
>temp_232|source:ó|start:9367|stop:9867
  B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c         C   
sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 
|  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j 
d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pa
thR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   tota
l_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azom
er/reads-for-assembl
>temp_233|source:ó|start:2512|stop:3012
 R   t	   sequencest   SequenceSourcet   fastat   nextt   post   syst   s
tderrR   t   flusht   appendR	   R   (   R   t   f_name(    (    sJ   /mn
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   
s    		(c         C   s   |  j  j ƒ  d  S(   N(   R    
R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastalib.pyR   >   s    (   R   R   R   R   (    (    (    
sJ   /mnt/data/azome
>temp_234|source:ó|start:4182|stop:4682
 D] } |  j | d | f ^ qº d t ƒD] } | d ^ qä |  _ t |  j ƒ 
|  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t   reverse(   t   ne
xt_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t   
sortedR   R/   R   t   total_uniqueR6   (   R   t   hasht   i(    (    sJ 
  /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7  
 _   s    	Jc         C   s!   |  j  r |  j ƒ  S|  j
 ƒ  Sd  S(   N(  
>temp_235|source:ó|start:10105|stop:10605
^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ
 ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^
 q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   
R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualsco
resRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib.pyR!     s*    		+#+c
         C   s  
>temp_236|source:ó|start:10832|stop:11332
j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyR6   <  s    					(   R   R   R   R   R!   R   R6   (    
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyR     s   		t   __main__i   R‚   i   i   R*   (    (    (
    (    (   R#   R|   R<   R    R   R   R   R   t   argvR    RŽ   R   (  
  (    (    sJ   /mn
>temp_237|source:ó|start:10747|stop:11247
 9  s    c         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |
  j j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   
R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/uti
ls/fastalib.pyR6   <  s    					(   R   R   R   R   R!   R   R6   
(    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/uti
ls/fastalib.pyR     s   		t   __main__i   R‚   i   i   R*   (    (
    (    (    (   R
>temp_238|source:ó|start:9127|stop:9627
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R
*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib.pyR   B   s   								t   QualSourcec         
  B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c         C   
sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 
|  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j 
d ƒ r’ | ^ q’ ƒ
>temp_239|source:ó|start:10430|stop:10930
   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyR!     s*    		+#+c   
      C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /
mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9
  s    c         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j
 j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R 
  (    (    sJ   /mn
>temp_240|source:ó|start:6560|stop:7060
j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n 
 | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ 
d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ 
ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } |
 j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d d
 d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$
 ƒ | d  k r;| 
>temp_241|source:ó|start:6383|stop:6883
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
R6   ¬   s    				c         C   sX  d d  l  j } d d  l j } g  }
 |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ
 t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t |
 ƒ t t | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7
<qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| 
j d d ƒ } | j
>temp_242|source:ó|start:4701|stop:5201
queR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingset
s2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   s£   |  j  r› |  
j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc | 
d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni   
 i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R  
 t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-ass
embly/trainingsets2/
>temp_243|source:ó|start:3204|stop:3704
 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ | |  _ | |  
_ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  
j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j 
d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >
(   t   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR"
   R	   R   R   R   t   unique_hash_dictt   unique_hash_listt   unique_nex
t_hashR   t   file
>temp_244|source:ó|start:3141|stop:3641
 „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z
 RS(   c         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  | 
 _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j
 j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶
 ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_f
ile_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R	   R   R  
 R   t   unique_ha
>temp_245|source:ó|start:5873|stop:6373
(   N(   R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/da
ta/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_
by_read_id    s    c         C   s   |  j  j ƒ  d  S(   N(   R1 
  R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _
 d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R  
 R1   R2   (   R  
>temp_246|source:ó|start:9393|stop:9893
Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | 
|  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ
 |  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _
 |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	  
 t   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   
(   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trai
ningsets2/Vibrio/uti
>temp_247|source:ó|start:4727|stop:5227
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yR!   r   s    	c         C   s£   |  j  r› |  j d k r” |  j |  j k 
 r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ 
| d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   
R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   h
ash_entry(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyRD
>temp_248|source:ó|start:4671|stop:5171
S(   N(   R   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c  
       C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  j 
} |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ | d |  _
	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/  
 R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    (    sJ   
/mnt/data/azomer/rea
>temp_249|source:ó|start:10018|stop:10518
 } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	
 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t
 | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 
| ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI 
  R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   
qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/f
>temp_250|source:ó|start:5217|stop:5717
talib.pyRD   x   s    	!&c         C   sì   d  |  _ |  j j
 ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d
 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j 
ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   
(   R*   R   R1   t   readlinet   stripR	   R   R"   R   RE   R5   R2   t 
  tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/data/azomer/reads-fo
r-assembly/trainings
>temp_251|source:ó|start:6104|stop:6604
N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/tra
iningsets2/Vibrio/utils/fastalib.pyR   ©   s    c         C   s8   d |  _
  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   
R   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly
/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s    				c         C   s
X  d d  l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k
 s_ |  j d k r† t
>temp_252|source:ó|start:3179|stop:3679
„  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _ 
 d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _
 g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  
|  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  
n  d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet	   lazy_in
itt   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   unique
_hash_listt   uniqu
>temp_253|source:ó|start:6493|stop:6993
     C   sX  d d  l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j
 d d k s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  | j t |  
j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d 
} n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j
 i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d d 
!ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ|
 j t d | d ƒ
>temp_254|source:ó|start:2287|stop:2787
 |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  
n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  
i    i   s)   [fastalib] Reading FASTA into memory: %ss   (   R   t	   sequ
encest   SequenceSourcet   fastat   nextt   post   syst   stderrR   t   
flusht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		
(c      
>temp_255|source:ó|start:1691|stop:2191
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s 
   %c         C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R  
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR   *   s    (	   t   __name__t   __module__R   t   TrueR   R 
  R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/fastalib.pyR       s   		t	   ReadFastac        
   B   s   e  Z d
>temp_256|source:ó|start:7435|stop:7935
0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ
| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | 
j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss  
 g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   co
lors   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t
   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag33333
3Ó?t   y2g333333Ã?s
>temp_257|source:ó|start:128|stop:628
„  ƒ  YZ e d k rÄ e e  j d ƒ Z	 e	 j d e e  j ƒ d k rº e  j d n
 d ƒ n  d S(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  „  Z 
e e d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c         C  
 s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   
opent   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c
         C   s`   
>temp_258|source:ó|start:8113|stop:8613
 xx-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mea
n: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs  
 .png(   i   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.gridsp
ect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R   t   maxt   int
t   figuret   rcParamst   updatet   rct   GridSpect   subplotRO   R   t 
  subplots_adjustt   plott   fill_betweenR   t   ylabelt   xlabelR*   t   
xtickst   ytickst 
>temp_259|source:ó|start:5676|stop:6176
/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s
$    		#c         C   s7   |  j  ƒ  x& |  j
 ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R
   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c         C  
 s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/a
zomer/reads-for-asse
>temp_260|source:ó|start:2731|stop:3231
/fastalib.pyR   /   s    		(c         C   s   |  j  j ƒ  
d  S(   N(   R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R  
 R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibr
io/utils/fastalib.pyR   .   s   	R   c           B   sn   e  Z e e e 
d  „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 
d	 d „ Z RS(   c
>temp_261|source:ó|start:7201|stop:7701
 d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d. t |	
 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ }
 | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t
 | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d
  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  
Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g    
  Y@i   t   figsize
>temp_262|source:ó|start:2819|stop:3319
  N(   R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/t
rainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R   R   ( 
   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyR   .   s   	R   c           B   sn   e  Z e e e d  „ Z
 d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „
 Z RS(   c         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  
|  _ d  |  _ g  | 
>temp_263|source:ó|start:4781|stop:5281
s2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   s£   |  j  r› |  
j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc | 
d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni   
 i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R  
 t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-ass
embly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c
         C   sì   
>temp_264|source:ó|start:9969|stop:10469
  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t
 | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_
 t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ 
 7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ 
t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   
Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    
sJ   /mnt/data/azome
>temp_265|source:ó|start:9911|stop:10411
   s    									:c         C   sF  |  j  j ƒ  d j ƒ
  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  
_ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ
 râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _
 g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(
   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5
   R2   RJ   (   R
>temp_266|source:ó|start:9137|stop:9637
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (
    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/util
s/fastalib.pyR   B   s   								t   QualSourcec           B   s/  
 e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   | | 
 _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |
  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ |
 ^ q’ ƒ |  _ |  
>temp_267|source:ó|start:4567|stop:5067
fastalib.pyR7   _   s    	Jc         C   s!   |  j  r
 |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    (  
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yR!   r   s    	c         C   s£   |  j  r› |  j d k r” |  j |  j k 
 r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ 
| d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   
R   RA   R"   R.   
>temp_268|source:ó|start:4329|stop:4829
e(   t   next_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.   R&
   R	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t   hasht   i
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyR7   _   s    	Jc         C   s!   |  j  r |
  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    (    
sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
!   r   s    	c
>temp_269|source:ó|start:4557|stop:5057
rio/utils/fastalib.pyR7   _   s    	Jc         C   s! 
  |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R 
  (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/
fastalib.pyR!   r   s    	c         C   s£   |  j  r› |  j d k r” |  j
 |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d j
 ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   
R   (   R   RA   
>temp_270|source:ó|start:3636|stop:4136
ue_hash_dictt   unique_hash_listt   unique_next_hashR   t   file_pointert  
 seekt	   total_seqR   t	   readlinest   startswitht   resett   init_unique_ha
sh(   R   R)   R,   R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyR   C   s(    									
				:	c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j
 ƒ  } | |  j k rq |  j | d j |  j ƒ |  j | d c d 7<q i | 
 j d 6|  j g d 
>temp_271|source:ó|start:7530|stop:8030
  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n
 Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t
   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	 
  linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ
¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g3333
33Ã?s   number of sequencess   sequence lengthi2   i   t   rotationiZ   t  
 sizes   xx-smallt
>temp_272|source:ó|start:1074|stop:1574
write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fastalib.pyR      s    c         C   s0   | r |  j  |
 ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R   (   R   R
   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR   !   s    iP   c         C   sk   t  d t | ƒ |
 ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | | | d 
!^ qE ƒ S(   Ni  
>temp_273|source:ó|start:8627|stop:9127
mt   figtextRE   t   textt   numpyt   meant   stdt   minR)   t   savefigt
   show(   R   t   titlet   destt   max_seq_lent	   xtickstept	   ytickste
pt   pltRh   t   sequence_lengthst   seq_len_distributionR8   t   figt   gs
t   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib.pyt&   visualize_sequence_length_distribution³   sx    
")"/)/*)
		
>temp_274|source:ó|start:8148|stop:8648
  axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max
: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t   matpl
otlib.pyplott   pyplott   matplotlib.gridspect   gridspecR6   R!   R"   R#   
R$   R   R%   R&   R   R   t   maxt   intt   figuret   rcParamst   update
t   rct   GridSpect   subplotRO   R   t   subplots_adjustt   plott   fill
_betweenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylim
t   figtextRE   t 
>temp_275|source:ó|start:8014|stop:8514
es   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s 
  %st   weights   xx-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s
6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x
-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   pyplott 
  matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R
   t   maxt   intt   figuret   rcParamst   updatet   rct   GridSpect   s
ubplotRO   R   t  
>temp_276|source:ó|start:9937|stop:10437
		:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ 
d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t
	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  
t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	
 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI
   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t  
 qualscoresRK   t  
>temp_277|source:ó|start:10435|stop:10935
   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/util
s/fastalib.pyR!     s*    		+#+c       
  C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s 
   c         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j 
d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (  
  (    sJ   /mnt/dat
>temp_278|source:ó|start:8438|stop:8938
figuret   rcParamst   updatet   rct   GridSpect   subplotRO   R   t   sub
plots_adjustt   plott   fill_betweenR   t   ylabelt   xlabelR*   t   xtick
st   ytickst   xlimt   ylimt   figtextRE   t   textt   numpyt   meant   
stdt   minR)   t   savefigt   show(   R   t   titlet   destt   max_seq_l
ent	   xtickstept	   ytickstept   pltRh   t   sequence_lengthst   seq_len_dis
tributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads
-for-assembly/traini
>temp_279|source:ó|start:6878|stop:7378
| j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d
 d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j 
d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  | j
 t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t |
	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d. t
 |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ
 } | j j i d& 
>temp_280|source:ó|start:395|stop:895
|  _ d  S(   Nt   w(   t   output_file_patht   opent   output_file_obj( 
  t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2
/Vibrio/utils/fastalib.pyt   __init__   s    	c         C   s`   | j  r
9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j ƒ |  j | j |
 ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt   idt
   lent   idst	   write_seqt   seq(   R   t   entryt   splitt   store_fre
quencies(    (    sJ
>temp_281|source:ó|start:7809|stop:8309
?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?
t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   sequen
ce lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt 
  ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   ha
t   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %
.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i
   (-   t   matplo
>temp_282|source:ó|start:9617|stop:10117
 ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+ 
  R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR  
 R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    							
		:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ 
d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t
	 | ƒ ^ qw |  _ 
>temp_283|source:ó|start:4374|stop:4874
   t   uppert	   hexdigestR.   R&   R	   t   sortedR   R/   R   t   total_u
niqueR6   (   R   t   hasht   i(    (    sJ   /mnt/data/azomer/reads-for-ass
embly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s    	J
c         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   n
ext_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trai
ningsets2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   s£   |  j 
 r› |  j d k r” |
>temp_284|source:ó|start:7623|stop:8123
iÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t   figsizei
   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linestylet
   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgf
fffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   numb
er of sequencess   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-
smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   
weights   xx-larget
>temp_285|source:ó|start:10116|stop:10616
 |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  |
 d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ 
|  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R
’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t 
  q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyR!     s*    		+#+c        
 C   s   |  j  j ƒ
>temp_286|source:ó|start:4328|stop:4828
se(   t   next_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.   R
&   R	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t   hasht   
i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR7   _   s    	Jc         C   s!   |  j  r 
|  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    (   
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
R!   r   s    	c
>temp_287|source:ó|start:5914|stop:6414
R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c         C
   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    
c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(
   Ni    (   R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mnt/
data/azomer/reads-fo
>temp_288|source:ó|start:2737|stop:3237
lib.pyR   /   s    		(c         C   s   |  j  j ƒ  d  S(
   N(   R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly
/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R   R   
(    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/uti
ls/fastalib.pyR   .   s   	R   c           B   sn   e  Z e e e d  „ 
Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d 
„ Z RS(   c     
>temp_289|source:ó|start:9102|stop:9602
		N(   R   R   R   RE   R   R7   R!   R
D   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads-for
-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   								
t   QualSourcec           B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z
 RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  | 
 _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g 
 |  j j ƒ  D] } |
>temp_290|source:ó|start:8089|stop:8589
î?s   %st   weights   xx-larget   hat   centeri	   s   axes.edgecolorg{®G
áz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s
   x-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   pypl
ott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R
   R   t   maxt   intt   figuret   rcParamst   updatet   rct   GridSpect
   subplotRO   R   t   subplots_adjustt   plott   fill_betweenR   t   yla
belt   xlabelR*   t
>temp_291|source:ó|start:8577|stop:9077
xlabelR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt 
  numpyt   meant   stdt   minR)   t   savefigt   show(   R   t   titlet
   destt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_le
ngthst   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /
mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   vi
sualize_sequence_length_distribution³   sx    ")
"/
>temp_292|source:ó|start:8093|stop:8593
   %st   weights   xx-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?
s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   
x-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   pyplott
   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R
   t   maxt   intt   figuret   rcParamst   updatet   rct   GridSpect   
subplotRO   R   t   subplots_adjustt   plott   fill_betweenR   t   ylabelt
   xlabelR*   t   
>temp_293|source:ó|start:4079|stop:4579
 ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j
 | <q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD] } | d ^
 qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t
   reverse(   t   next_regulart   hashlibt   sha1R   t   uppert	   hexdig
estR.   R&   R	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t   
hasht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyR
>temp_294|source:ó|start:6021|stop:6521
   get_seq_by_read_id    s    c         C   s   |  j  j ƒ  d  S(
   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/
trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c         C   s8   d |
  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	
   R   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assem
bly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s    				c         C 
  sX  d d  l  j }
>temp_295|source:ó|start:5516|stop:6016
(   Ni   t    R(   (   R*   R   R1   t   readlinet   stripR	   R   R"   R
   RE   R5   R2   t   tellR-   R>   (   R   R   t   line(    (    sJ   /mnt
/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s
$    		#c         C   s7   |  j  ƒ  x& |  j
 ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R
   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastali
>temp_296|source:ó|start:269|stop:769
J   e  Z d  „  Z e e d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(
   c         C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   ou
tput_file_patht   opent   output_file_obj(   t   selfR   (    (    sJ   /mn
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __in
it__   s    	c         C   s`   | j  r9 | r9 |  j d | j d t | j
 ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   freq
uency:%d(   t   un
>temp_297|source:ó|start:987|stop:1487
    *c         C   s   |  j  j d | ƒ d  S(   Ns   >%s(   R 
  t   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assembly/tr
ainingsets2/Vibrio/utils/fastalib.pyR      s    c         C   s0   | r 
|  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R   (   
R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastalib.pyR   !   s    iP   c         C   sk   t  d t 
| ƒ | ƒ t | ƒ
>temp_298|source:ó|start:7624|stop:8124
ÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t   figsizei 
  i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linestylet 
  -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgff
ffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   numbe
r of sequencess   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-s
mallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   w
eights   xx-larget
>temp_299|source:ó|start:9294|stop:9794
tils/fastalib.pyR   B   s   								t   QualSourcec           B   s
/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   |
 |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ
 |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r
’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   
R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_quals
R   R4   R5   R6   (
>temp_300|source:ó|start:7240|stop:7740
 j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 |
 d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ |
 j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ
 t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ 
Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i
   s   [fastalib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?
s   axes.linewidtht
>temp_301|source:ó|start:2382|stop:2882
 j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   
[fastalib] Reading FASTA into memory: %ss   (   R   t	   sequencest   Seque
nceSourcet   fastat   nextt   post   syst   stderrR   t   flusht   appen
dR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		(c   
      C   s   |  j  j ƒ  d  S(   N(   R    R   (   R   (    (    sJ   
/mnt/data/azomer/rea
>temp_302|source:ó|start:9054|stop:9554
"/)/*)		
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ 
  (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/u
tils/fastalib.pyR   B   s   								t   QualSourcec           B   s
/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   |
 |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ
 |  _ |  j j d ƒ
>temp_303|source:ó|start:1700|stop:2200
r/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c 
        C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (    (  
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yR   *   s    (	   t   __name__t   __module__R   t   TrueR   R   R   R
   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.pyR       s   		t	   ReadFastac           B   s
   e  Z d  „  Z d
>temp_304|source:ó|start:4447|stop:4947
total_uniqueR6   (   R   t   hasht   i(    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s    
	Jc         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R  
 t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   s£ 
  |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_
 |  j rc | d n
>temp_305|source:ó|start:10533|stop:11033
  s*    		+#+c         C   s   |  j  j
 ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c         C
   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni
    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s 
   					(   R
>temp_306|source:ó|start:662|stop:1162
 | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss
   frequency:%d(   t   uniquet   write_idt   idt   lent   idst	   write_se
qt   seq(   R   t   entryt   splitt   store_frequencies(    (    sJ   /mnt
/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   store
   s    *c         C   s   |  j  j d | ƒ d  S(   Ns   >%s( 
  R   t   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vib
>temp_307|source:ó|start:6124|stop:6624
 R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/ut
ils/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ 
g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   
(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyR6   ¬   s    				c         C   sX  d d  l  j } d
 d  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t
 j j	 d |  j ƒ
>temp_308|source:ó|start:1482|stop:1982
| ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ
 S(   Ni    s   i   (   t   rangeR   t   join(   R   t   sequencet   
piece_lengtht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/t
rainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  
j  j ƒ  d  S(   N(   R   t   close(   R   (    (    sJ   /mnt/data/azome
r/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   
t   __name__t   __m
>temp_309|source:ó|start:1304|stop:1804
   R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingset
s2/Vibrio/utils/fastalib.pyR   !   s    iP   c         C   sk   t  d 
t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | |
 | d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join(   R   
t   sequencet   piece_lengtht   tickst   x(    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c    
     C   s   |  j 
>temp_310|source:ó|start:5707|stop:6207
/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		#
c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq
 Wt S(   N(   R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /
mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   ge
t_seq_by_read_id    s    c         C   s   |  j  j ƒ  d  S(   N(
   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils
>temp_311|source:ó|start:10145|stop:10645
 d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ
  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   R
G   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   R
E   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/a
zomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    
		+#+c         C   s   |  j  j ƒ  d  S(
   N(   R1   R   (
>temp_312|source:ó|start:9478|stop:9978
 d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ | 
 j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j
 ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t  
 qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   
R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fastalib.pyR     s    									:c      
   C   sF  |  j  j
>temp_313|source:ó|start:9245|stop:9745
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   
								t   QualSourcec           B   s/   e  Z e d  „ Z d „  Z 
d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d |  
_ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r 
d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d
  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst
	   quals_intR   R
>temp_314|source:ó|start:10329|stop:10829
 RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   
R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly
/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		+
#+c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   
R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/uti
ls/fastalib.pyR   9  s    c         C   sA   d |  _  d  |  _ d  |  _ d
  |  _ g  |  _ |  
>temp_315|source:ó|start:8505|stop:9005
R   t   subplots_adjustt   plott   fill_betweenR   t   ylabelt   xlabelR*
   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt   numpyt
   meant   stdt   minR)   t   savefigt   show(   R   t   titlet   destt
   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_lengthst 
  seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize_
sequence_length_dist
>temp_316|source:ó|start:6398|stop:6898
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s    
				c         C   sX  d d  l  j } d d  l j } g  } |  j ƒ  xo |
  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  
n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ
 d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d
@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } 
| j | d d !ƒ }
>temp_317|source:ó|start:2206|stop:2706
 RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r
´ |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  
n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè 
 i    i   s)   [fastalib] Reading FASTA into memory: %ss   (   R   t	   seq
uencest   SequenceSourcet   fastat   nextt   post   syst   stderrR   t  
 flusht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/
reads-for-assembly/t
>temp_318|source:ó|start:4615|stop:5115
c         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   ne
xt_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/train
ingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   s£   |  j  
r› |  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j 
rc | d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(  
 Ni    i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R 
  R   t   False( 
>temp_319|source:ó|start:7550|stop:8050
| d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi
'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t   figsizei   i  
 gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	 
  linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?
t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of s
equencess   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt
   xmint   xmaxt  
>temp_320|source:ó|start:5241|stop:5741
	!&c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d 
} x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d
 ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	
 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R*   R   R1   t  
 readlinet   stripR	   R   R"   R   RE   R5   R2   t   tellR-   R>   (   R 
  R   t   line(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2
/Vibrio/utils/fastal
>temp_321|source:ó|start:6765|stop:7265
 } |	 | c d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d
 d d d ƒ| j d d ƒ } | j | d d !ƒ } | j t ƒ | j d d 
d d d d d d ƒ | j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d
 d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  k 
r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d
 t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! 
d+ d d, | ƒ | j
>temp_322|source:ó|start:5089|stop:5589
   R   R   t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!
&c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x | 
 j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ 
|  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ
  |  _ |  j d 7_ t S(   Ni   t    R(   (   R*   R   R1   t   readlin
et   stripR	   R   
>temp_323|source:ó|start:2304|stop:2804
k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j 
j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [
fastalib] Reading FASTA into memory: %ss   (   R   t	   sequencest   Sequenc
eSourcet   fastat   nextt   post   syst   stderrR   t   flusht   appendR
	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assembly
/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		(c    
     C   s   |  j 
>temp_324|source:ó|start:8038|stop:8538
   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-l
arget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.
2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png
(   i   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.gridspect 
  gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R   t   maxt   intt   
figuret   rcParamst   updatet   rct   GridSpect   subplotRO   R   t   sub
plots_adjustt   plo
>temp_325|source:ó|start:6289|stop:6789
 _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   
R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/uti
ls/fastalib.pyR6   ¬   s    				c         C   sX  d d  l  j } d d  
l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j 
j	 d |  j ƒ t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d 
ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } n  d g | d }	 x | D] } 
|	 | c d 7<qW|
>temp_326|source:ó|start:9920|stop:10420
  									:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d
  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  
j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j
  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j
 j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    
(   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   R
J   (   R   t   qu
>temp_327|source:ó|start:2495|stop:2995
mory: %ss   (   R   t	   sequencest   SequenceSourcet   fastat   nextt   
post   syst   stderrR   t   flusht   appendR	   R   (   R   t   f_name(
    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fas
talib.pyR   /   s    		(c         C   s   |  j  j ƒ  d  
S(   N(   R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R   R 
  (    (    (    sJ 
>temp_328|source:ó|start:2220|stop:2720
      C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d 
d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j | 
 j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s) 
  [fastalib] Reading FASTA into memory: %ss   (   R   t	   sequencest   Seq
uenceSourcet   fastat   nextt   post   syst   stderrR   t   flusht   app
endR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/reads-for-asse
mbly/trainingsets2/V
>temp_329|source:ó|start:5381|stop:5881
_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7
} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (  
 R*   R   R1   t   readlinet   stripR	   R   R"   R   RE   R5   R2   t   te
llR-   R>   (   R   R   t   line(    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		
#c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  
j Sq Wt S(   N(
>temp_330|source:ó|start:8835|stop:9335
distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_l
ength_distribution³   sx    ")"/
)/*)		N(   R   R  
 R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   
B   s   				
>temp_331|source:ó|start:9540|stop:10040
 |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^
 q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R
,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4
   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    								
	:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d 
} xÂ |  j  j ƒ  }
>temp_332|source:ó|start:3290|stop:3790
|  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ 
|  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ 
r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t
   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R	 
  R   R   R   t   unique_hash_dictt   unique_hash_listt   unique_next_hash
R   t   file_pointert   seekt	   total_seqR   t	   readlinest   startswitht 
  resett   init_uni
>temp_333|source:ó|start:8816|stop:9316
engthst   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   v
isualize_sequence_length_distribution³   sx    ")
"/)/*)		
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   
(    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/uti
ls/fastalib.pyR   B
>temp_334|source:ó|start:8633|stop:9133
figtextRE   t   textt   numpyt   meant   stdt   minR)   t   savefigt   sh
ow(   R   t   titlet   destt   max_seq_lent	   xtickstept	   ytickstept   
pltRh   t   sequence_lengthst   seq_len_distributionR8   t   figt   gst   a
x1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyt&   visualize_sequence_length_distribution³   sx    
")"/)/*)	
	
>temp_335|source:ó|start:4891|stop:5391
|  j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d
 |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R  
 RA   R"   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_
entry(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/uti
ls/fastalib.pyRD   x   s    	!&c         C   sì   d  |  _ |
  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ | 
 j d 7_ t St	 
>temp_336|source:ó|start:9609|stop:10109
 ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_path
R*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_
qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer
/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    			
						:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d
  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  
D] } t	 | ƒ ^ q
>temp_337|source:ó|start:1886|stop:2386
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t  
 __name__t   __module__R   t   TrueR   R   R   R   R   (    (    (    sJ 
  /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   
    s   		t	   ReadFastac           B   s   e  Z d  „  Z d „  Z
 RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r
´ |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  
n  |  j  j |  j j
>temp_338|source:ó|start:10085|stop:10585
 ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j 
|  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j 
ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   
R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (
   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-asse
mbly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		+
#+c
>temp_339|source:ó|start:837|stop:1337
   t   entryt   splitt   store_frequencies(    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   store   s    *
c         C   s   |  j  j d | ƒ d  S(   Ns   >%s(   R   t   wri
te(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyR      s    c         C   s0   | r |  j  | ƒ
 } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R   (   R   R   
R   (    (    sJ   
>temp_340|source:ó|start:9133|stop:9633
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ
   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR   B   s   								t   QualSourcec           B   
s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   |
 |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  
ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ 
r’ | ^ q’ ƒ |  _
>temp_341|source:ó|start:7935|stop:8435
   number of sequencess   sequence lengthi2   i   t   rotationiZ   t   size
s   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s  
 %st   weights   xx-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6
   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-
larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   pyplott  
 matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R 
  t   maxt   intt
>temp_342|source:ó|start:1332|stop:1832
sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
   !   s    iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g 
} d j g  t  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni
    s   i   (   t   rangeR   t   join(   R   t   sequencet   piece_leng
tht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingset
s2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  
d  S(   N(   R   
>temp_343|source:ó|start:10223|stop:10723
 ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni  
 RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R  
 RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    
		+#+c         C   s   |  j  j ƒ  d  S
(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibr
>temp_344|source:ó|start:817|stop:1317
te_seqt   seq(   R   t   entryt   splitt   store_frequencies(    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   
store   s    *c         C   s   |  j  j d | ƒ d  S(   Ns   >
%s(   R   t   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c         C   s
0   | r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   
R   (   R   R   
>temp_345|source:ó|start:9152|stop:9652
   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    (    s
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
   B   s   								t   QualSourcec           B   s/   e  Z e d  „
 Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ |
 |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d 
ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _
 |  j ƒ  d  S(  
>temp_346|source:ó|start:10131|stop:10631
 St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  
7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t
 S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj
   R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR! 
    s*    		+#+c         C   s   |  j 
 j ƒ  d  S(   N(
>temp_347|source:ó|start:9696|stop:10196
   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR 
  R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    							
		:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _
 d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } 
t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ 
 t | ƒ ƒ Pn  |
>temp_348|source:ó|start:9657|stop:10157
  R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_in
tR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8  
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR     s    									:c         C   sF  |  j  
j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r
¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St 
Sn  | j d ƒ râ |
>temp_349|source:ó|start:982|stop:1482
   s    *c         C   s   |  j  j d | ƒ d  S(   Ns   >%s( 
  R   t   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c         C   s0   |
 r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R   
(   R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fastalib.pyR   !   s    iP   c         C   sk   t  d
 t | ƒ | ƒ t 
>temp_350|source:ó|start:8119|stop:8619
rget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2
f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(
   i   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.gridspect  
 gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R   t   maxt   intt   f
iguret   rcParamst   updatet   rct   GridSpect   subplotRO   R   t   subp
lots_adjustt   plott   fill_betweenR   t   ylabelt   xlabelR*   t   xticks
t   ytickst   xlim
>temp_351|source:ó|start:5972|stop:6472
-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    
c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   ( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  |  _
 |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   R 
  (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/
fastalib.pyR6   ¬   
>temp_352|source:ó|start:6273|stop:6773
 d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R  
 R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyR6   ¬   s    				c         C   sX  d d 
 l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j
 d k r† t j j	 d |  j ƒ t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j 
ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } n  d g | d 
}	 x | D] } |	 |
>temp_353|source:ó|start:278|stop:778
 d  „  Z e e d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c   
      C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file
_patht   opent   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s
    	c         C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f 
ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(
   t   uniquet   
>temp_354|source:ó|start:9716|stop:10216
   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   ( 
  R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/fastalib.pyR     s    									:c    
     C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ
  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _
 |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  |
 d | j ƒ  7} q
>temp_355|source:ó|start:5688|stop:6188
/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    	
	#c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j
 | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R   t   read
_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyt   get_seq_by_read_id    s    c         C   s   |  j  
j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainin
>temp_356|source:ó|start:8481|stop:8981
ridSpect   subplotRO   R   t   subplots_adjustt   plott   fill_betweenR   
t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt   figtextR
E   t   textt   numpyt   meant   stdt   minR)   t   savefigt   show(   R
   t   titlet   destt   max_seq_lent	   xtickstept	   ytickstept   pltRh   
t   sequence_lengthst   seq_len_distributionR8   t   figt   gst   ax1t   y
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyt&   visual
>temp_357|source:ó|start:8468|stop:8968
t   rct   GridSpect   subplotRO   R   t   subplots_adjustt   plott   fill
_betweenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylim
t   figtextRE   t   textt   numpyt   meant   stdt   minR)   t   savefigt
   show(   R   t   titlet   destt   max_seq_lent	   xtickstept	   ytickstep
t   pltRh   t   sequence_lengthst   seq_len_distributionR8   t   figt   gst
   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.
>temp_358|source:ó|start:8735|stop:9235
t   destt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_
lengthst   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   
visualize_sequence_length_distribution³   sx    ")
"/)/*)		
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ  
 (    (    (    sJ  
>temp_359|source:ó|start:2156|stop:2656
dFastac           B   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ  
 g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j 
j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j
 j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib] Re
ading FASTA into memory: %ss   (   R   t	   sequencest   SequenceSourcet   
fastat   nextt   post   syst   stderrR   t   flusht   appendR	   R   ( 
  R   t   f_name( 
>temp_360|source:ó|start:8183|stop:8683
: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges 
  .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   pyplott   matplot
lib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R   t   m
axt   intt   figuret   rcParamst   updatet   rct   GridSpect   subplotRO 
  R   t   subplots_adjustt   plott   fill_betweenR   t   ylabelt   xlabel
R*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt   nump
yt   meant   stdt
>temp_361|source:ó|start:8167|stop:8667
®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é
?s   x-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   py
plott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&  
 R   R   t   maxt   intt   figuret   rcParamst   updatet   rct   GridSpe
ct   subplotRO   R   t   subplots_adjustt   plott   fill_betweenR   t   y
labelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t
   textt   numpyt 
>temp_362|source:ó|start:9706|stop:10206
   R	   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5 
  R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vibrio/utils/fastalib.pyR     s    									
:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ 
|  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^
 qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ 
ƒ Pn  | d | j
>temp_363|source:ó|start:1512|stop:2012
 | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   i   (   t
   rangeR   t   join(   R   t   sequencet   piece_lengtht   tickst   x( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR   &   s    %c         C   s   |  j  j ƒ  d  S(   N(   R   
t   close(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name__t   __module__R 
  t   TrueR   R  
>temp_364|source:ó|start:2666|stop:3166
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR 
  /   s    		(c         C   s   |  j  j ƒ  d  S(   N(  
 R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R   R   (    (   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR   .   s   	R   c           B   sn   e  Z e e e d  „ Z d „  
Z d „  Z d „  Z
>temp_365|source:ó|start:5260|stop:5760
         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  }
 | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j | 
 j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  
j d 7_ t S(   Ni   t    R(   (   R*   R   R1   t   readlinet   strip
R	   R   R"   R   RE   R5   R2   t   tellR-   R>   (   R   R   t   line(  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyR;   ˆ   s$   
>temp_366|source:ó|start:6954|stop:7454
	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ |
 j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  |
 j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d 
t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d
. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d
7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 
d0 d: t | ƒ t& j'
>temp_367|source:ó|start:9442|stop:9942
 C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _
 t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } |
 j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_fi
le_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t  
 total_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    
								
>temp_368|source:ó|start:10026|stop:10526
|  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^
 qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ 
ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^
 q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R
*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscor
esRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.
>temp_369|source:ó|start:6686|stop:7186
d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } n  d g | d }	 x | D]
 } |	 | c d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d
 d d d ƒ| j d d ƒ } | j | d d !ƒ } | j t ƒ | j d d d
 d d d d d ƒ | j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d 
d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  k r
`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d 
t |	 ƒ d | ƒ g
>temp_370|source:ó|start:658|stop:1158
d t | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns   %s|
%ss   frequency:%d(   t   uniquet   write_idt   idt   lent   idst	   writ
e_seqt   seq(   R   t   entryt   splitt   store_frequencies(    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   s
tore   s    *c         C   s   |  j  j d | ƒ d  S(   Ns   >%
s(   R   t   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2
>temp_371|source:ó|start:5444|stop:5944
Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni
   t    R(   (   R*   R   R1   t   readlinet   stripR	   R   R"   R   RE   
R5   R2   t   tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    	
	#c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |
  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R   t   
read_id(    (    sJ 
>temp_372|source:ó|start:10216|stop:10716
7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S( 
  Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"
   R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /m
nt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!    
 s*    		+#+c         C   s   |  j  j ƒ
  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingset
>temp_373|source:ó|start:2666|stop:3166
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR 
  /   s    		(c         C   s   |  j  j ƒ  d  S(   N(  
 R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R   R   (    (   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR   .   s   	R   c           B   sn   e  Z e e e d  „ Z d „  
Z d „  Z d „  Z
>temp_374|source:ó|start:3772|stop:4272
resett   init_unique_hash(   R   R)   R,   R   R-   t   l(    (    sJ   /mn
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   C   
s(    													:	c         C   s  x§ |  j  ƒ  r©
 t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j |  j ƒ |  j |
 d c d 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t
	 g  |  j D] } |  j | d | f ^ qº d t ƒD] } | d ^ qä |  _ t
 |  j ƒ |  _ |  j
>temp_375|source:ó|start:1413|stop:1913
   !   s    iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g }
 d j g  t  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni 
   s   i   (   t   rangeR   t   join(   R   t   sequencet   piece_lengt
ht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  d
  S(   N(   R   t   close(   R   (    (    sJ   /mnt/data/azomer/reads-for
-assembly/trainingse
>temp_376|source:ó|start:10715|stop:11215
ts2/Vibrio/utils/fastalib.pyR   9  s    c         C   sA   d |  _  d  | 
 _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	 
  R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					(   R 
  R   R   R   R!   R   R6   (    (    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets2/Vibrio/utils/fastalib.pyR     s   		t   __main_
_i   R‚   i   i  
>temp_377|source:ó|start:5086|stop:5586
 R	   R   R   t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azome
r/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!
&c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x
 |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ 
r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j
 ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R*   R   R1   t   read
linet   stripR	   R
>temp_378|source:ó|start:3169|stop:3669
 „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s
  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  
_	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _
 nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý
 |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet	
   lazy_initt   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt
   unique_hash_list
>temp_379|source:ó|start:4570|stop:5070
talib.pyR7   _   s    	Jc         C   s!   |  j  r | 
 j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    (    s
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!
   r   s    	c         C   s£   |  j  r› |  j d k r” |  j |  j k  r”
 |  j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | 
d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R 
  RA   R"   R.   R/ 
>temp_380|source:ó|start:1733|stop:2233
2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  d
  S(   N(   R   t   close(   R   (    (    sJ   /mnt/data/azomer/reads-for
-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name
__t   __module__R   t   TrueR   R   R   R   R   (    (    (    sJ   /mnt/
data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR       s
   		t	   ReadFastac           B   s   e  Z d  „  Z d „  Z RS( 
  c         C   s
>temp_381|source:ó|start:4080|stop:4580
 ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j 
| <q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD] } | d ^
 qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t
   reverse(   t   next_regulart   hashlibt   sha1R   t   uppert	   hexdige
stR.   R&   R	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t   h
asht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyR7
>temp_382|source:ó|start:5361|stop:5861
 |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ
 Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   
Ni   t    R(   (   R*   R   R1   t   readlinet   stripR	   R   R"   R   RE
   R5   R2   t   tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    
		#c         C   s7   |  j  ƒ  x& |  j ƒ  r
2 |  j | k r |  
>temp_383|source:ó|start:10414|stop:10914
t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trai
ningsets2/Vibrio/utils/fastalib.pyR!     s*    		+#
+c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyR   9  s    c         C   sA   d |  _  d  |  _ d  |  _ d  |  
_ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1
   R2   (   R   ( 
>temp_384|source:ó|start:6681|stop:7181
 j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } n  d g | d }	 x |
 D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d d 
d d d d d ƒ| j d d ƒ } | j | d d !ƒ } | j t ƒ | j d
 d d d d d d d ƒ | j |	 d d d d  ƒ| j t d | d ƒ |	 d! d
 d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d 
 k r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t
 d t |	 ƒ d |
>temp_385|source:ó|start:3376|stop:3876
|  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | 
^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fa
sta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R	   R  
 R   R   t   unique_hash_dictt   unique_hash_listt   unique_next_hashR   t
   file_pointert   seekt	   total_seqR   t	   readlinest   startswitht   rese
tt   init_unique_hash(   R   R)   R,   R   R-   t   l(    (    sJ   /mnt/da
ta/azomer/reads-for-
>temp_386|source:ó|start:2641|stop:3141
  t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.pyR   /   s    		(c         C   s   | 
 j  j ƒ  d  S(   N(   R    R   (   R   (    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R  
 R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyR   .   s   	R   c           B   sn   e  Z
 e e e d  „ Z d
>temp_387|source:ó|start:9576|stop:10076
 t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni 
   R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_i
ntR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8 
  (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/
fastalib.pyR     s    									:c         C   sF  |  j 
 j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ 
r¥ | j ƒ  |  _ g 
>temp_388|source:ó|start:1869|stop:2369
nt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *  
 s    (	   t   __name__t   __module__R   t   TrueR   R   R   R   R   (
    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/util
s/fastalib.pyR       s   		t	   ReadFastac           B   s   e  Z
 d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _
 x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j 
ƒ t j j	 ƒ  n 
>temp_389|source:ó|start:10847|stop:11347
   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <
  s    					(   R   R   R   R   R!   R   R6   (    (    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   
  s   		t   __main__i   R‚   i   i   R*   (    (    (    (    (   R
#   R|   R<   R    R   R   R   R   t   argvR    RŽ   R   (    (    (    sJ 
  /mnt/data/azomer/r
>temp_390|source:ó|start:8184|stop:8684
 %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges  
 .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   pyplott   matplotl
ib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R   t   ma
xt   intt   figuret   rcParamst   updatet   rct   GridSpect   subplotRO  
 R   t   subplots_adjustt   plott   fill_betweenR   t   ylabelt   xlabelR
*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt   numpy
t   meant   stdt 
>temp_391|source:ó|start:5724|stop:6224
brio/utils/fastalib.pyR;   ˆ   s$    		#c     
    C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   
R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id 
   s    c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (  
 R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/ut
ils/fastalib.pyR   
>temp_392|source:ó|start:8515|stop:9015
subplots_adjustt   plott   fill_betweenR   t   ylabelt   xlabelR*   t   xt
ickst   ytickst   xlimt   ylimt   figtextRE   t   textt   numpyt   meant
   stdt   minR)   t   savefigt   show(   R   t   titlet   destt   max_se
q_lent	   xtickstept	   ytickstept   pltRh   t   sequence_lengthst   seq_len_
distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_l
ength_distribution³ 
>temp_393|source:ó|start:6548|stop:7048
 ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t
 j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ 
t t | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<qW
| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j d
 d ƒ } | j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ |
 j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j d# 
ƒ | j d$ ƒ | 
>temp_394|source:ó|start:9163|stop:9663
   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   /mnt/da
ta/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s  
 								t   QualSourcec           B   s/   e  Z e d  „ Z d „  Z
 d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d |
  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r
 d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  
d  S(   Ni    R(  
>temp_395|source:ó|start:9712|stop:10212
  t   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6  
 (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/tra
iningsets2/Vibrio/utils/fastalib.pyR     s    									:c 
        C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  
j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |
  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ P
n  | d | j ƒ  7
>temp_396|source:ó|start:7722|stop:8222
   axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	   linewidthg
š™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\
Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess  
 sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint   
xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-large
t   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f /
 std: %.2f / min: %s
>temp_397|source:ó|start:1450|stop:1950
   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] 
} | | | | | d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   
join(   R   t   sequencet   piece_lengtht   tickst   x(    (    sJ   /mnt/
data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s
    %c         C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R 
  (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/
fastalib.pyR   *   
>temp_398|source:ó|start:10821|stop:11321
  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2  
 (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibr
io/utils/fastalib.pyR6   <  s    					(   R   R   R   R   R!   R  
 R6   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibr
io/utils/fastalib.pyR     s   		t   __main__i   R‚   i   i   R*   
(    (    (    (    (   R#   R|   R<   R    R   R   R   R   t   argvR    R
Ž   R   (    (    ( 
>temp_399|source:ó|start:7950|stop:8450
equencess   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt
   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights
   xx-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / 
mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs
   .png(   i   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.gri
dspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R   t   maxt   
intt   figuret   r
>temp_400|source:ó|start:401|stop:901
d  S(   Nt   w(   t   output_file_patht   opent   output_file_obj(   t  
 selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyt   __init__   s    	c         C   s`   | j  r9 | r
9 |  j d | j d t | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d
  S(   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt   idt   len
t   idst	   write_seqt   seq(   R   t   entryt   splitt   store_frequenci
es(    (    sJ   /mn
>temp_401|source:ó|start:30|stop:530
 l  Z  d  d l Z d  d l Z d d d „  ƒ  YZ d d d „  ƒ  YZ d d d „
  ƒ  YZ d d d	 „  ƒ  YZ e d k rÄ e e  j d ƒ Z	 e	 j d e e  j ƒ d
 k rº e  j d n d ƒ n  d S(   iÿÿÿÿNt   FastaOutputc           B   sJ
   e  Z d  „  Z e e d „ Z d „  Z e d „ Z d d „ Z d „  Z RS( 
  c         C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   out
put_file_patht   opent   output_file_obj(   t   selfR   (    (    sJ   /mnt
/data/azomer/reads-f
>temp_402|source:ó|start:2918|stop:3418
/utils/fastalib.pyR   >   s    (   R   R   R   R   (    (    (    sJ   /
mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   . 
  s   	R   c           B   sn   e  Z e e e d  „ Z d „  Z d „  Z 
d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c      
   C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |
  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j 
r£ d  |  _ nD t g 
>temp_403|source:ó|start:10074|stop:10574
g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ r
â |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g
  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   
t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   
R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/rea
ds-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		
+#
>temp_404|source:ó|start:7214|stop:7714
] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ 
d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i
 d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j'
 | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j*
 } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd
  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t   
figsizei   i   gÍÌ
>temp_405|source:ó|start:5244|stop:5744
&c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } 
x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ
 r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 |
 j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R*   R   R1   t   re
adlinet   stripR	   R   R"   R   RE   R5   R2   t   tellR-   R>   (   R   R
   t   line(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.
>temp_406|source:ó|start:1724|stop:2224
iningsets2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j 
 j ƒ  d  S(   N(   R   t   close(   R   (    (    sJ   /mnt/data/azomer/
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t
   __name__t   __module__R   t   TrueR   R   R   R   R   (    (    (    s
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR 
      s   		t	   ReadFastac           B   s   e  Z d  „  Z d „ 
 Z RS(   c      
>temp_407|source:ó|start:5023|stop:5523
   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t
   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-assem
bly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c  
       C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } |
 sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j
 j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j
 d 7_ t S(   Ni
>temp_408|source:ó|start:4227|stop:4727
 } | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti  
 R	   R   t   reverse(   t   next_regulart   hashlibt   sha1R   t   uppe
rt	   hexdigestR.   R&   R	   t   sortedR   R/   R   t   total_uniqueR6   ( 
  R   t   hasht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/fastalib.pyR7   _   s    	Jc      
   C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR; 
  (   R   (    (  
>temp_409|source:ó|start:10133|stop:10633
St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7}
 q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S
(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj  
 R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   
  s*    		+#+c         C   s   |  j  j
 ƒ  d  S(   N(  
>temp_410|source:ó|start:2138|stop:2638
		t	   ReadFastac           B   s   e  Z d  „  Z d „  Z RS(   c 
        C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d
 d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j 
|  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s
)   [fastalib] Reading FASTA into memory: %ss   (   R   t	   sequencest   S
equenceSourcet   fastat   nextt   post   syst   stderrR   t   flusht   a
ppendR	   R   (   
>temp_411|source:ó|start:2189|stop:2689
 d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _
 x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j 
ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ
 d  S(   Niè  i    i   s)   [fastalib] Reading FASTA into memory: %ss   (
   R   t	   sequencest   SequenceSourcet   fastat   nextt   post   syst 
  stderrR   t   flusht   appendR	   R   (   R   t   f_name(    (    sJ   
/mnt/data/azomer/rea
>temp_412|source:ó|start:4282|stop:4782
(   NR   t   counti   R	   R   t   reverse(   t   next_regulart   hashl
ibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t   sortedR   R/   R   
t   total_uniqueR6   (   R   t   hasht   i(    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s    
	Jc         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(  
 R   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets
>temp_413|source:ó|start:9271|stop:9771
/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   								t   QualS
ourcec           B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c
         C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  | 
 _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j 
ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t
   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1 
  R2   t   total_qu
>temp_414|source:ó|start:3141|stop:3641
 „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z
 RS(   c         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  | 
 _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j
 j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶
 ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_f
ile_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R	   R   R  
 R   t   unique_ha
>temp_415|source:ó|start:10457|stop:10957
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     
s*    		+#+c         C   s   |  j  j ƒ 
 d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c         C   s
A   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    
(   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/
azomer/reads-for-ass
>temp_416|source:ó|start:617|stop:1117
  C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j 
ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet  
 write_idt   idt   lent   idst	   write_seqt   seq(   R   t   entryt   s
plitt   store_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/tra
iningsets2/Vibrio/utils/fastalib.pyt   store   s    *c         C   s
   |  j  j d | ƒ d  S(   Ns   >%s(   R   t   write(   R   R	   (  
  (    sJ   /mnt/dat
>temp_417|source:ó|start:1056|stop:1556
>%s(   R   t   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c         C   
s0   | r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R  
 R   (   R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/tra
iningsets2/Vibrio/utils/fastalib.pyR   !   s    iP   c         C   sk 
  t  d t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | 
| | | | d !^
>temp_418|source:ó|start:5720|stop:6220
2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		#c  
       C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(
   R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/azom
er/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read
_id    s    c         C   s   |  j  j ƒ  d  S(   N(   R1   R   
(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyR
>temp_419|source:ó|start:10542|stop:11042
 		+#+c         C   s   |  j  j ƒ  d  
S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c         C   sA   d
 |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   
R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azome
r/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    			
		(   R   R   
>temp_420|source:ó|start:1656|stop:2156
   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  d  S
(   N(   R   t   close(   R   (    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name__t
   __module__R   t   TrueR   R   R   R   R   (    (    (    sJ   /mnt/dat
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR       s   
		t	   Rea
>temp_421|source:ó|start:10916|stop:11416
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR6   <  s    					(   R   R   R   R   R!   R   R6   (    (   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR     s   		t   __main__i   R‚   i   i   R*   (    (    (    
(    (   R#   R|   R<   R    R   R   R   R   t   argvR    RŽ   R   (    ( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyt   <module>
>temp_422|source:ó|start:1390|stop:1890
rio/utils/fastalib.pyR   !   s    iP   c         C   sk   t  d t | 
ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | | | d
 !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join(   R   t   s
equencet   piece_lengtht   tickst   x(    (    sJ   /mnt/data/azomer/reads-fo
r-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c         
C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (    (    sJ   /mn
t/data/azomer/reads-
>temp_423|source:ó|start:7716|stop:8216
ÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	   line
widthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   r
ightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequenc
ess   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmi
nt   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx
-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: 
%.2f / std: %.2f / m
>temp_424|source:ó|start:6305|stop:6805
ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   R   (    (    s
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6
   ¬   s    				c         C   sX  d d  l  j } d d  l j } g  } |
  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ 
t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | 
ƒ t t | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<q
W| j d	 d@ ƒ }
>temp_425|source:ó|start:3833|stop:4333
l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR   C   s(    													:	c         C   
s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j
 |  j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6d d 
6|  j | <q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD] } |
 d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   
R   t   reverse( 
>temp_426|source:ó|start:2838|stop:3338
   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyR   >   s    (   R   R   R   R   (    (    (    sJ   /
mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   . 
  s   	R   c           B   sn   e  Z e e e d  „ Z d „  Z d „  Z 
d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c      
   C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |
  _ | |  _	 i  |  
>temp_427|source:ó|start:9563|stop:10063
 d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  
d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qual
st	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   
R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastalib.pyR     s    									:c         C
   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } |
 s¬ t | ƒ r¥ | j
>temp_428|source:ó|start:7388|stop:7888
 j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( |
 ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j
+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿ
i'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t   figsizei   i
   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linestylet   -t
	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffff
î?t   rightg\Âõ(\ï
>temp_429|source:ó|start:3358|stop:3858
t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } 
| j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    
t   >(   t   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_
caseR"   R	   R   R   R   t   unique_hash_dictt   unique_hash_listt   uniq
ue_next_hashR   t   file_pointert   seekt	   total_seqR   t	   readlinest   s
tartswitht   resett   init_unique_hash(   R   R)   R,   R   R-   t   l(   
 (    sJ   /mnt/data
>temp_430|source:ó|start:7330|stop:7830
 d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ
 | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ
 f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn |
 j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [
fastalib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.
linewidtht   gridt   colors   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?
i   t   leftgš™™™™
>temp_431|source:ó|start:350|stop:850
  c         C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   out
put_file_patht   opent   output_file_obj(   t   selfR   (    (    sJ   /mnt
/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __ini
t__   s    	c         C   s`   | j  r9 | r9 |  j d | j d t | j 
ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequ
ency:%d(   t   uniquet   write_idt   idt   lent   idst	   write_seqt   se
q(   R   t   entr
>temp_432|source:ó|start:7978|stop:8478
i2   i   t   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt   ymint 
  ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   cent
eri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min
: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   
t   matplotlib.pyplott   pyplott   matplotlib.gridspect   gridspecR6   R!   
R"   R#   R$   R   R%   R&   R   R   t   maxt   intt   figuret   rcParamst
   updatet   rct 
>temp_433|source:ó|start:10874|stop:11374
   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					(   R
   R   R   R   R!   R   R6   (    (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s   		t   __main
__i   R‚   i   i   R*   (    (    (    (    (   R#   R|   R<   R    R   R 
  R   R   t   argvR    RŽ   R   (    (    (    sJ   /mnt/data/azomer/reads-fo
r-assembly/trainings
>temp_434|source:ó|start:4103|stop:4603
 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j 
D] } |  j | d | f ^ qº d t ƒD] } | d ^ qä |  _ t |  j ƒ |
  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t   reverse(   t   nex
t_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t   s
ortedR   R/   R   t   total_uniqueR6   (   R   t   hasht   i(    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   
_   s    
>temp_435|source:ó|start:3059|stop:3559
/fastalib.pyR   .   s   	R   c           B   sn   e  Z e e e d  „ Z
 d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „
 Z RS(   c         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  
|  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  
j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ 
q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta
_file_patht   Nonet
>temp_436|source:ó|start:10680|stop:11180
zomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c
         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ
 d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (  
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yR6   <  s    					(   R   R   R   R   R!   R   R6   (    (    (  
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yR     s   	
>temp_437|source:ó|start:1079|stop:1579
(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastalib.pyR      s    c         C   s0   | r |  j  | ƒ 
} n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R   (   R   R   R
   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyR   !   s    iP   c         C   sk   t  d t | ƒ | ƒ 
t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | | | d !^ q
E ƒ S(   Ni    s 
>temp_438|source:ó|start:7098|stop:7598
& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d 
| ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ
 | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d
6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j 
g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; 
d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d
? ƒ n Xy | j, 
>temp_439|source:ó|start:251|stop:751
c           B   sJ   e  Z d  „  Z e e d „ Z d „  Z e d „ Z d d „
 Z d „  Z RS(   c         C   s   | |  _  t | d ƒ |  _ d  S(   N
t   w(   t   output_file_patht   opent   output_file_obj(   t   selfR   
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyt   __init__   s    	c         C   s`   | j  r9 | r9 |  j d
 | j d t | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns
   %s|%ss   freque
>temp_440|source:ó|start:10090|stop:10590
] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j 
 j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D]
 } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   
RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R
   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/
trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		+
#+c   
>temp_441|source:ó|start:862|stop:1362
   store_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyt   store   s    *c         C   s   |
  j  j d | ƒ d  S(   Ns   >%s(   R   t   write(   R   R	   (    (  
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yR      s    c         C   s0   | r |  j  | ƒ } n  |  j j d | ƒ
 d  S(   Ns   %s(   R   R   R   (   R   R   R   (    (    sJ   /mnt/
data/azomer/reads-fo
>temp_442|source:ó|start:5836|stop:6336
 ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R
   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c         C  
 s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/a
zomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c
         C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S( 
  Ni    (   R"   R*
>temp_443|source:ó|start:9494|stop:9994
_ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ 
nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   N
i    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals
_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R
8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/util
s/fastalib.pyR     s    									:c         C   sF  |  
j  j ƒ  d j ƒ  |
>temp_444|source:ó|start:10602|stop:11102
   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azo
mer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c 
        C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ 
d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (    
sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
6   <  s    					(   R   R   R   R   R!   R   R6   (    (    (    
sJ   /mnt/data/azome
>temp_445|source:ó|start:4751|stop:5251
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c  
       C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  j 
} |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ | d |  _
	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/  
 R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x
   s    	!&
>temp_446|source:ó|start:3727|stop:4227
otal_seqR   t	   readlinest   startswitht   resett   init_unique_hash(   R  
 R)   R,   R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/tr
ainingsets2/Vibrio/utils/fastalib.pyR   C   s(    												
	:	c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |
  j k rq |  j | d j |  j ƒ |  j | d c d 7<q i |  j d 6|  
j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j D] } |  j | d | 
f ^ qº d t ƒD]
>temp_447|source:ó|start:9671|stop:10171
   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1 
  R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ 
  /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR  
   s    									:c         C   sF  |  j  j ƒ  d j ƒ
  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  
_ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ
 râ |  j  j |  j  
>temp_448|source:ó|start:2991|stop:3491
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
R   .   s   	R   c           B   sn   e  Z e e e d  „ Z d „  Z d
 „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c
         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  
_ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ 
|  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ 
|  j ƒ  |  j	 rý |
>temp_449|source:ó|start:8326|stop:8826
yplott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R& 
  R   R   t   maxt   intt   figuret   rcParamst   updatet   rct   GridSp
ect   subplotRO   R   t   subplots_adjustt   plott   fill_betweenR   t   
ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t
   textt   numpyt   meant   stdt   minR)   t   savefigt   show(   R   t
   titlet   destt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   
sequence_lengthst  
>temp_450|source:ó|start:8778|stop:9278
 ytickstept   pltRh   t   sequence_lengthst   seq_len_distributionR8   t   f
igt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_length_distribution³   s
x    ")"/)/*)
		N(   R   R   R   RE   R   R7   R!
   RD   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads
-for-assembly/traini
>temp_451|source:ó|start:5904|stop:6404
RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/tr
ainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c  
       C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   
©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d
 ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   R   (    (    
sJ   /mnt/data/azome
>temp_452|source:ó|start:2101|stop:2601
rio/utils/fastalib.pyR       s   		t	   ReadFastac           B   s
   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | 
ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |
  j j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j
 j d ƒ d  S(   Niè  i    i   s)   [fastalib] Reading FASTA into memory: 
%ss   (   R   t	   sequencest   SequenceSourcet   fastat   nextt   post 
  syst   stderrR  
>temp_453|source:ó|start:4567|stop:5067
fastalib.pyR7   _   s    	Jc         C   s!   |  j  r
 |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    (  
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yR!   r   s    	c         C   s£   |  j  r› |  j d k r” |  j |  j k 
 r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ 
| d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   
R   RA   R"   R.   
>temp_454|source:ó|start:1451|stop:1951
  sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] }
 | | | | | d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   j
oin(   R   t   sequencet   piece_lengtht   tickst   x(    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s 
   %c         C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R  
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR   *   s
>temp_455|source:ó|start:468|stop:968
_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c         C   s`   
| j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j ƒ |  j |
 j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt
   idt   lent   idst	   write_seqt   seq(   R   t   entryt   splitt   st
ore_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastali
>temp_456|source:ó|start:6847|stop:7347
d d d ƒ| j d d ƒ } | j | d d !ƒ } | j t ƒ | j d d d
 d d d d d ƒ | j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d d
 d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  k r`
t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d t
 |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+
 d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d
 d) d4 d5 d6 ƒ| 
>temp_457|source:ó|start:9833|stop:10333
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   
  s    									:c         C   sF  |  j  j ƒ  d j ƒ  
|  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _
 g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ 
râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ 
g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(  
 t    (   R1   RH 
>temp_458|source:ó|start:450|stop:950
ent   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c  
       C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j
 | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uni
quet   write_idt   idt   lent   idst	   write_seqt   seq(   R   t   entr
yt   splitt   store_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assem
bly/trainingsets2/Vi
>temp_459|source:ó|start:42|stop:542
 l Z d  d l Z d d d „  ƒ  YZ d d d „  ƒ  YZ d d d „  ƒ  YZ d 
d d	 „  ƒ  YZ e d k rÄ e e  j d ƒ Z	 e	 j d e e  j ƒ d k rº e  j
 d n d ƒ n  d S(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  
„  Z e e d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c      
   C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_pat
ht   opent   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer
/reads-for-assembly/
>temp_460|source:ó|start:4819|stop:5319
    	c         C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j | 
 j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _
 | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"
   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyRD   x   s    	!&c         C   sì   d  |  _ |  j j 
ƒ  d j ƒ  |  _ d
>temp_461|source:ó|start:6544|stop:7044
|  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ
 t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t |
 ƒ t t | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<
qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j
 d d ƒ } | j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ
 | j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j
 d# ƒ | j d$ ƒ 
>temp_462|source:ó|start:10562|stop:11062
+#+c         C   s   |  j  j ƒ  d  S(   N(   R1   R 
  (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.pyR   9  s    c         C   sA   d |  _  d  |  _ d  
|  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   
R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly
/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					(   R   R   
R   R   R!   R   
>temp_463|source:ó|start:2913|stop:3413
ibrio/utils/fastalib.pyR   >   s    (   R   R   R   R   (    (    (    s
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
   .   s   	R   c           B   sn   e  Z e e e d  „ Z d „  Z d „
  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c  
       C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _
 g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |
  j r£ d  |  _ nD 
>temp_464|source:ó|start:821|stop:1321
eqt   seq(   R   t   entryt   splitt   store_frequencies(    (    sJ   /mn
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   stor
e   s    *c         C   s   |  j  j d | ƒ d  S(   Ns   >%s(
   R   t   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assem
bly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c         C   s0   
| r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R  
 (   R   R   R  
>temp_465|source:ó|start:1844|stop:2344
   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR   *   s    (	   t   __name__t   __module__R   t   TrueR
   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/
trainingsets2/Vibrio/utils/fastalib.pyR       s   		t	   ReadFastac 
          B   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  _
  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k
 r… t j j d |  j
>temp_466|source:ó|start:3415|stop:3915
 g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j
 ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet	   laz
y_initt   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   un
ique_hash_listt   unique_next_hashR   t   file_pointert   seekt	   total_seq
R   t	   readlinest   startswitht   resett   init_unique_hash(   R   R)   R,
   R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingse
ts2/Vibrio/utils/fas
>temp_467|source:ó|start:239|stop:739
 FastaOutputc           B   sJ   e  Z d  „  Z e e d „ Z d „  Z e d „
 Z d d „ Z d „  Z RS(   c         C   s   | |  _  t | d ƒ |  _
 d  S(   Nt   w(   t   output_file_patht   opent   output_file_obj(   t
   selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.pyt   __init__   s    	c         C   s`   | j  r9 |
 r9 |  j d | j d t | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ 
d  S(   Ns   %s|%
>temp_468|source:ó|start:8010|stop:8510
 sizes   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î
?s   %st   weights   xx-larget   hat   centeri	   s   axes.edgecolorg{®Gá
z”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s
   x-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   pyplo
tt   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R 
  R   t   maxt   intt   figuret   rcParamst   updatet   rct   GridSpect
   subplotRO   R   
>temp_469|source:ó|start:10771|stop:11271
 C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   
Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt
/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s
    					(   R   R   R   R   R!   R   R6   (    (    (    sJ   /mnt
/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s
   		t   __main__i   R‚   i   i   R*   (    (    (    (    (   R#   
R|   R<   R    R   
>temp_470|source:ó|start:9555|stop:10055
|  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |
  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t
   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (
   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyR     s    									:c   
      C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j 
ƒ  } | s¬ t | ƒ
>temp_471|source:ó|start:10497|stop:10997
gsets2/Vibrio/utils/fastalib.pyR!     s*    		+#
+c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyR   9  s    c         C   sA   d |  _  d  |  _ d  |  _ d  |  _ 
g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   
R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastali
>temp_472|source:ó|start:8150|stop:8650
axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: 
%st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t   matplot
lib.pyplott   pyplott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$
   R   R%   R&   R   R   t   maxt   intt   figuret   rcParamst   updatet
   rct   GridSpect   subplotRO   R   t   subplots_adjustt   plott   fill_b
etweenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt
   figtextRE   t   
>temp_473|source:ó|start:4778|stop:5278
sets2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   s£   |  j  r› 
|  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc 
| d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni
    i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R
   t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&
c         C   sì
>temp_474|source:ó|start:7609|stop:8109
n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   
t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	
   linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ë
Q¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333
333Ã?s   number of sequencess   sequence lengthi2   i   t   rotationiZ   t 
  sizes   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸
î?s   %st   weight
>temp_475|source:ó|start:10101|stop:10601
 ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t |
 ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | 
ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R
	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qua
lscoresRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingset
s2/Vibrio/utils/fastalib.pyR!     s*    		+#+
c         C   
>temp_476|source:ó|start:6378|stop:6878
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR6   ¬   s    				c         C   sX  d d  l  j } d d  l j } 
g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  
j ƒ t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé
 t | ƒ t t | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c 
d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ
| j d d ƒ } 
>temp_477|source:ó|start:7153|stop:7653
| j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d
* ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d
1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t
$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ 
t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | 
d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  
i    i   s   [fas
>temp_478|source:ó|start:4980|stop:5480
| d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R" 
  R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyRD   x   s    	!&c         C   sì   d  |  _ |  j j ƒ
  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7
_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  
7} q. |  j rÊ | n
>temp_479|source:ó|start:2410|stop:2910
 q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib] Reading FASTA in
to memory: %ss   (   R   t	   sequencest   SequenceSourcet   fastat   next
t   post   syst   stderrR   t   flusht   appendR	   R   (   R   t   f_
name(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/util
s/fastalib.pyR   /   s    		(c         C   s   |  j  j ƒ 
 d  S(   N(   R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets
>temp_480|source:ó|start:8594|stop:9094
tickst   ytickst   xlimt   ylimt   figtextRE   t   textt   numpyt   meant
   stdt   minR)   t   savefigt   show(   R   t   titlet   destt   max_s
eq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_lengthst   seq_len
_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_
length_distribution³   sx    ")"/
)/
>temp_481|source:ó|start:8551|stop:9051
tweenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt 
  figtextRE   t   textt   numpyt   meant   stdt   minR)   t   savefigt   
show(   R   t   titlet   destt   max_seq_lent	   xtickstept	   ytickstept 
  pltRh   t   sequence_lengthst   seq_len_distributionR8   t   figt   gst  
 ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyt&   visualize_sequence_length_distribution³   sx    
")
>temp_482|source:ó|start:5459|stop:5959
 7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (
   R*   R   R1   t   readlinet   stripR	   R   R"   R   RE   R5   R2   t   
tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		
#c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |
  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R   t   read_id(    (  
  sJ   /mnt/data/azo
>temp_483|source:ó|start:7183|stop:7683
 g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ |
 j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ
| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ
 | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d
 d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ
 n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Rea
ding: %ss   g      
>temp_484|source:ó|start:7251|stop:7751
d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) 
d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ 
| j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ 
f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | 
j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [f
astalib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.l
inewidtht   gridt 
>temp_485|source:ó|start:5122|stop:5622
t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib.pyRD   x   s    	!&c         C   sì   d
  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk |
 |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ
 Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   N
i   t    R(   (   R*   R   R1   t   readlinet   stripR	   R   R"   R   RE 
  R5   R2   t   tel
>temp_486|source:ó|start:9625|stop:10125
ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R
"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5
   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assem
bly/trainingsets2/Vibrio/utils/fastalib.pyR     s    									
:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ
 |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ 
^ qw |  _ |  j d
>temp_487|source:ó|start:9722|stop:10222
lst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R  
 R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2
/Vibrio/utils/fastalib.pyR     s    									:c         
C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } |
 s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j
 d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d |
 j ƒ  7} q7 | j
>temp_488|source:ó|start:5934|stop:6434
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyt   get_seq_by_read_id    s    c         C   s   |  j  j ƒ  
d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c         C   s8
   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   
R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-fo
r-assembly/trainings
>temp_489|source:ó|start:7756|stop:8256
ors   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t 
  bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333
Ó?t   y2g333333Ã?s   number of sequencess   sequence lengthi2   i   t   rot
ationiZ   t   sizes   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g  
    à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   centeri	   s   axes.e
dgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st  
 vagš™™™™™é?s   x-l
>temp_490|source:ó|start:8565|stop:9065
 ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   
t   textt   numpyt   meant   stdt   minR)   t   savefigt   show(   R   
t   titlet   destt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t  
 sequence_lengthst   seq_len_distributionR8   t   figt   gst   ax1t   y(   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyt&   visualize_sequence_length_distribution³   sx    "
)
>temp_491|source:ó|start:6207|stop:6707
/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  
|  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   ( 
  R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/u
tils/fastalib.pyR6   ¬   s    				c         C   sX  d d  l  j } d d
  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j
 j	 d |  j ƒ t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d
 ƒ | sé t | ƒ
>temp_492|source:ó|start:809|stop:1309
t	   write_seqt   seq(   R   t   entryt   splitt   store_frequencies(    (
    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib
.pyt   store   s    *c         C   s   |  j  j d | ƒ d  S(  
 Ns   >%s(   R   t   write(   R   R	   (    (    sJ   /mnt/data/azomer/rea
ds-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c      
   C   s0   | r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R
   R   R   (   R
>temp_493|source:ó|start:3421|stop:3921
 j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n
  d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet	   lazy_init
t   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   unique_h
ash_listt   unique_next_hashR   t   file_pointert   seekt	   total_seqR   t	
   readlinest   startswitht   resett   init_unique_hash(   R   R)   R,   R 
  R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.
>temp_494|source:ó|start:1029|stop:1529
 d | ƒ d  S(   Ns   >%s(   R   t   write(   R   R	   (    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   
   s    c         C   s0   | r |  j  | ƒ } n  |  j j d | ƒ d  S
(   Ns   %s(   R   R   R   (   R   R   R   (    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s    
iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t  d
 t | ƒ d ƒ D]
>temp_495|source:ó|start:6864|stop:7364
 d d ƒ } | j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ
 | j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j
 d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZ
d } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ
 g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | 
j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ
| j | d7 ƒ } |
>temp_496|source:ó|start:820|stop:1320
seqt   seq(   R   t   entryt   splitt   store_frequencies(    (    sJ   /m
nt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   sto
re   s    *c         C   s   |  j  j d | ƒ d  S(   Ns   >%s(
   R   t   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-asse
mbly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c         C   s0  
 | r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R 
  (   R   R   R 
>temp_497|source:ó|start:9482|stop:9982
|  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j 
r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ 
 d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qua
lst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R  
 R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2
/Vibrio/utils/fastalib.pyR     s    									:c         
C   sF  |  j  j ƒ 
>temp_498|source:ó|start:8459|stop:8959
   updatet   rct   GridSpect   subplotRO   R   t   subplots_adjustt   plot
t   fill_betweenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   xlim
t   ylimt   figtextRE   t   textt   numpyt   meant   stdt   minR)   t   
savefigt   show(   R   t   titlet   destt   max_seq_lent	   xtickstept	   
ytickstept   pltRh   t   sequence_lengthst   seq_len_distributionR8   t   fi
gt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/
>temp_499|source:ó|start:971|stop:1471
yt   store   s    *c         C   s   |  j  j d | ƒ d  S(   N
s   >%s(   R   t   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c        
 C   s0   | r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R  
 R   R   (   R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibrio/utils/fastalib.pyR   !   s    iP   c         C 
  sk   t  d t | ƒ
>temp_500|source:ó|start:9587|stop:10087
 j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   ( 
  t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   R  
 R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (   
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
R     s    									:c         C   sF  |  j  j ƒ  d 
j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ 
 |  _ g  |  j j ƒ
>temp_501|source:ó|start:3806|stop:4306
  R)   R,   R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/t
rainingsets2/Vibrio/utils/fastalib.pyR   C   s(    												
	:	c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | 
|  j k rq |  j | d j |  j ƒ |  j | d c d 7<q i |  j d 6| 
 j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j D] } |  j | d |
 f ^ qº d t ƒD] } | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(  
 NR   t   counti 
>temp_502|source:ó|start:4243|stop:4743
 |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t   
reverse(   t   next_regulart   hashlibt   sha1R   t   uppert	   hexdigestR
.   R&   R	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t   hash
t   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/ut
ils/fastalib.pyR7   _   s    	Jc         C   s!   |  j
  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (   
 (    sJ   /mnt/data
>temp_503|source:ó|start:6113|stop:6613
  R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _
 d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R  
 R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyR6   ¬   s    				c         C   sX  d d 
 l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j
 d k r† t j j	 d
>temp_504|source:ó|start:3422|stop:3922
j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n 
 d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet	   lazy_initt
   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   unique_ha
sh_listt   unique_next_hashR   t   file_pointert   seekt	   total_seqR   t	 
  readlinest   startswitht   resett   init_unique_hash(   R   R)   R,   R  
 R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.p
>temp_505|source:ó|start:2531|stop:3031
st   SequenceSourcet   fastat   nextt   post   syst   stderrR   t   flus
ht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/reads
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		(
c         C   s   |  j  j ƒ  d  S(   N(   R    R   (   R   (   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR   >   s    (   R   R   R   R   (    (    (    sJ   /mnt/data/azom
er/reads-for-assembl
>temp_506|source:ó|start:10032|stop:10532
j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |
  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ P
n  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q| 
 _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘
   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK  
 t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/u
tils/fastalib.pyR!  
>temp_507|source:ó|start:9464|stop:9964
|  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  
j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’
 ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   
R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R
5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-asse
mbly/trainingsets2/Vibrio/utils/fastalib.pyR     s    									
:c         C 
>temp_508|source:ó|start:2884|stop:3384
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R
   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingset
s2/Vibrio/utils/fastalib.pyR   .   s   	R   c           B   sn   e  Z e
 e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	
 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ | |  _ | |  
_ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  
j  ƒ |  _ |  j j
>temp_509|source:ó|start:1677|stop:2177
   sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.
pyR   &   s    %c         C   s   |  j  j ƒ  d  S(   N(   R   t   
close(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastalib.pyR   *   s    (	   t   __name__t   __module__R   t 
  TrueR   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyR       s   		t	   Read
Fastac           B 
>temp_510|source:ó|start:701|stop:1201
|  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet   wri
te_idt   idt   lent   idst	   write_seqt   seq(   R   t   entryt   split
t   store_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/fastalib.pyt   store   s    *c         C   s   
|  j  j d | ƒ d  S(   Ns   >%s(   R   t   write(   R   R	   (    ( 
   sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.
pyR      s    c
>temp_511|source:ó|start:5555|stop:6055
  t   readlinet   stripR	   R   R"   R   RE   R5   R2   t   tellR-   R>   (
   R   R   t   line(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		#
c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(
   N(   R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by
_read_id    s    
>temp_512|source:ó|start:7526|stop:8026
r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn 
n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i
   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.5
0t	   linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸
…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g
333333Ã?s   number of sequencess   sequence lengthi2   i   t   rotationiZ   
t   sizes   xx-sma
>temp_513|source:ó|start:8123|stop:8623
   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / 
std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   
i   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.gridspect   gri
dspecR6   R!   R"   R#   R$   R   R%   R&   R   R   t   maxt   intt   figur
et   rcParamst   updatet   rct   GridSpect   subplotRO   R   t   subplots
_adjustt   plott   fill_betweenR   t   ylabelt   xlabelR*   t   xtickst  
 ytickst   xlimt  
>temp_514|source:ó|start:10372|stop:10872
  R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ 
  /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!  
   s*    		+#+c         C   s   |  j  
j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c         
C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   N
i    (   R"   R*   
>temp_515|source:ó|start:10511|stop:11011
utils/fastalib.pyR!     s*    		+#+c    
     C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /m
nt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9 
 s    c         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j
 j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R  
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR6   <  s
>temp_516|source:ó|start:9188|stop:9688
   RM   R   R6   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads-for-assem
bly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   								t   Qu
alSourcec           B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS( 
  c         C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d 
 |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j 
j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (  
 t   quals_file_pat
>temp_517|source:ó|start:6003|stop:6503
utils/fastalib.pyt   get_seq_by_read_id    s    c         C   s   |
  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c     
    C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni   
 (   R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mnt/data/azom
er/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s    			
	c         C   
>temp_518|source:ó|start:5300|stop:5800
  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7
_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  
7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   ( 
  R*   R   R1   t   readlinet   stripR	   R   R"   R   RE   R5   R2   t   t
ellR-   R>   (   R   R   t   line(    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		
#c  
>temp_519|source:ó|start:549|stop:1049
gsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c         C   s`   |
 j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j ƒ |  j |
 j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt 
  idt   lent   idst	   write_seqt   seq(   R   t   entryt   splitt   sto
re_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib.pyt   store   s    *c         C   s   |  j  j
 d | ƒ d  S(  
>temp_520|source:ó|start:9975|stop:10475
  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ
 r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St
 Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} 
q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(
   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R
"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /
mnt/data/azomer/read
>temp_521|source:ó|start:4079|stop:4579
 ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j
 | <q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD] } | d ^
 qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t
   reverse(   t   next_regulart   hashlibt   sha1R   t   uppert	   hexdig
estR.   R&   R	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t   
hasht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyR
>temp_522|source:ó|start:3672|stop:4172
  unique_next_hashR   t   file_pointert   seekt	   total_seqR   t	   readline
st   startswitht   resett   init_unique_hash(   R   R)   R,   R   R-   t  
 l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/
fastalib.pyR   C   s(    													:	c         C  
 s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d 
j |  j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6d d
 6|  j | <q Wg  t
>temp_523|source:ó|start:8466|stop:8966
tet   rct   GridSpect   subplotRO   R   t   subplots_adjustt   plott   fi
ll_betweenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   yl
imt   figtextRE   t   textt   numpyt   meant   stdt   minR)   t   savefig
t   show(   R   t   titlet   destt   max_seq_lent	   xtickstept	   ytickst
ept   pltRh   t   sequence_lengthst   seq_len_distributionR8   t   figt   g
st   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastali
>temp_524|source:ó|start:482|stop:982
selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyt   __init__   s    	c         C   s`   | j  r9 | r9
 |  j d | j d t | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d 
 S(   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt   idt   lent
   idst	   write_seqt   seq(   R   t   entryt   splitt   store_frequencie
s(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyt   store
>temp_525|source:ó|start:1140|stop:1640
mbly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c         C   s0  
 | r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R 
  (   R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/fastalib.pyR   !   s    iP   c         C   sk   t 
 d t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | |
 | | d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join(   R
   t   sequencet 
>temp_526|source:ó|start:3110|stop:3610
 sn   e  Z e e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „ 
 Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ |
 |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d
 |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ 
 D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S( 
  Ni    t   >(   t   fasta_file_patht   Nonet   namet	   lazy_initt   allo
w_mixed_caseR"   R	 
>temp_527|source:ó|start:6055|stop:6555
c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ | 
 j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   R   (
    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fas
talib.pyR6   ¬   s    				c         C   sX  d d  l  j } d d  l j 
} g  } |  j ƒ  x
>temp_528|source:ó|start:10275|stop:10775
 |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   
R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t
   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/util
s/fastalib.pyR!     s*    		+#+c       
  C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s 
   c         C  
>temp_529|source:ó|start:4177|stop:4677
|  j D] } |  j | d | f ^ qº d t ƒD] } | d ^ qä |  _ t |  j
 ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t   reverse(   t
   next_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   
t   sortedR   R/   R   t   total_uniqueR6   (   R   t   hasht   i(    (  
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yR7   _   s    	Jc         C   s!   |  j  r |  j ƒ  
S|  j ƒ  Sd  S(   
>temp_530|source:ó|start:4147|stop:4647
d d 6|  j | <q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD]
 } | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti  
 R	   R   t   reverse(   t   next_regulart   hashlibt   sha1R   t   uppe
rt	   hexdigestR.   R&   R	   t   sortedR   R/   R   t   total_uniqueR6   ( 
  R   t   hasht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/fastalib.pyR7   _   s    	Jc      
   C   s!   |  j  r
>temp_531|source:ó|start:8747|stop:9247
  max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_lengthst   
seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/a
zomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize_se
quence_length_distribution³   sx    ")"
/)/*)		N(   R
   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    (
    sJ   /mnt/data/a
>temp_532|source:ó|start:10109|stop:10609
w |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ
 Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q
|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*  
 R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscoresR
K   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyR!     s*    		+#+c   
      C   s   |  
>temp_533|source:ó|start:7618|stop:8118
A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t   figs
izei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linest
ylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   
topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s  
 number of sequencess   sequence lengthi2   i   t   rotationiZ   t   sizes 
  xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %s
t   weights   xx-l
>temp_534|source:ó|start:4733|stop:5233
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   
r   s    	c         C   s£   |  j  r› |  j d k r” |  j |  j k  r” | 
 j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d 
|  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   R
A   R"   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_en
try(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyRD   x  
>temp_535|source:ó|start:4019|stop:4519
|  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j |  j ƒ |  j | d c
 d 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  | 
 j D] } |  j | d | f ^ qº d t ƒD] } | d ^ qä |  _ t |  j 
ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t   reverse(   t  
 next_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t
   sortedR   R/   R   t   total_uniqueR6   (   R   t   hasht   i(    (    
sJ   /mnt/data/azome
>temp_536|source:ó|start:4903|stop:5403
|  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ | 
d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R
.   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    ( 
   sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.
pyRD   x   s    	!&c         C   sì   d  |  _ |  j j ƒ  d
 j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_
 t St	 Sn  | j d 
>temp_537|source:ó|start:5246|stop:5746
&c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x
 |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ 
r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j
 ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R*   R   R1   t   read
linet   stripR	   R   R"   R   RE   R5   R2   t   tellR-   R>   (   R   R 
  t   line(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibr
io/utils/fastalib.py
>temp_538|source:ó|start:7350|stop:7850
| d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j%
 d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) 
d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy
 | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %
ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt
   colors   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™
™©?t   bottomg¸…ëQ
>temp_539|source:ó|start:409|stop:909
 Nt   w(   t   output_file_patht   opent   output_file_obj(   t   selfR 
  (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/
fastalib.pyt   __init__   s    	c         C   s`   | j  r9 | r9 |  j 
d | j d t | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   
Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt   idt   lent   ids
t	   write_seqt   seq(   R   t   entryt   splitt   store_frequencies(    (
    sJ   /mnt/data/a
>temp_540|source:ó|start:8399|stop:8899
%   R&   R   R   t   maxt   intt   figuret   rcParamst   updatet   rct  
 GridSpect   subplotRO   R   t   subplots_adjustt   plott   fill_betweenR 
  t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt   figtex
tRE   t   textt   numpyt   meant   stdt   minR)   t   savefigt   show(  
 R   t   titlet   destt   max_seq_lent	   xtickstept	   ytickstept   pltRh 
  t   sequence_lengthst   seq_len_distributionR8   t   figt   gst   ax1t  
 y(    (    sJ   /mn
>temp_541|source:ó|start:5634|stop:6134
   R   R   t   line(    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		#
c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(
   N(   R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/dat
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_b
y_read_id    s    c         C   s   |  j  j ƒ  d  S(   N(   R1  
 R   (   R   (   
>temp_542|source:ó|start:10317|stop:10817
   (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2 
  RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		
+#+c         C   s   |  j  j ƒ  d  S(   N(   R1 
  R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyR   9  s    c         C   sA   d |  _  d  |  _
 d  |  _ d  |  _ 
>temp_543|source:ó|start:7821|stop:8321
eftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt
   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   sequence lengthi2 
  i   t   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt   ymint   y
maxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   centeri
	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %
s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t 
  matplotlib.pyplott
>temp_544|source:ó|start:9570|stop:10070
 _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(
   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   q
uals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,
   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR     s    									:c         C   sF 
 |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t 
| ƒ r¥ | j ƒ  | 
>temp_545|source:ó|start:10070|stop:10570
 _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d 
ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  
_ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R
(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R
5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer
/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		
+
>temp_546|source:ó|start:4946|stop:5446
 | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i  
 R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t  
 False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-assembly
/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c    
     C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | s
r t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j
 ƒ  t | ƒ ƒ Pn
>temp_547|source:ó|start:3877|stop:4377
ssembly/trainingsets2/Vibrio/utils/fastalib.pyR   C   s(    									
				:	c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j 
ƒ  } | |  j k rq |  j | d j |  j ƒ |  j | d c d 7<q i |  
j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j D] } |  j |
 d | f ^ qº d t ƒD] } | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  
d  S(   NR   t   counti   R	   R   t   reverse(   t   next_regulart   
hashlibt   sha1R  
>temp_548|source:ó|start:4943|stop:5443
 n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i
   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t
   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-assem
bly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c  
       C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } |
 sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j
 j ƒ  t | ƒ ƒ 
>temp_549|source:ó|start:8153|stop:8653
s.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st
   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t   matplotlib
.pyplott   pyplott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   
R   R%   R&   R   R   t   maxt   intt   figuret   rcParamst   updatet   
rct   GridSpect   subplotRO   R   t   subplots_adjustt   plott   fill_betw
eenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt   
figtextRE   t   tex
>temp_550|source:ó|start:7658|stop:8158
] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidt
ht   gridt   colors   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i   t 
  leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blac
kt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   sequence length
i2   i   t   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt   ymint 
  ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   cent
eri	   s   axes.edg
>temp_551|source:ó|start:9928|stop:10428
						:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ 
d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ 
 D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  
j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  
D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1 
  RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (  
 R   t   qualscores
>temp_552|source:ó|start:1002|stop:1502
        C   s   |  j  j d | ƒ d  S(   Ns   >%s(   R   t   write( 
  R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.pyR      s    c         C   s0   | r |  j  | ƒ } 
n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R   (   R   R   R   
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyR   !   s    iP   c         C   sk   t  d t | ƒ | ƒ t 
| ƒ g } d j g
>temp_553|source:ó|start:4864|stop:5364
d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc | d 
n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i
   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t
   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c   
      C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } |
 sr t | ƒ rk | |
>temp_554|source:ó|start:5842|stop:6342
 |  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R   t 
  read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyt   get_seq_by_read_id    s    c         C   s   
|  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c    
     C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni  
  (   R"   R*   R	 
>temp_555|source:ó|start:10561|stop:11061
+#+c         C   s   |  j  j ƒ  d  S(   N(   R1   R
   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.pyR   9  s    c         C   sA   d |  _  d  |  _ d 
 |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R‘  
 R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					(   R   R  
 R   R   R!   R  
>temp_556|source:ó|start:5067|stop:5567
R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    (    s
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD
   x   s    	!&c         C   sì   d  |  _ |  j j ƒ  d j
 ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t 
St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. 
|  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R*   
R   R1   t   readl
>temp_557|source:ó|start:10932|stop:11432
data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s
    					(   R   R   R   R   R!   R   R6   (    (    (    sJ   /mnt/
data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s
   		t   __main__i   R‚   i   i   R*   (    (    (    (    (   R#   R
|   R<   R    R   R   R   R   t   argvR    RŽ   R   (    (    (    sJ   /mn
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   <mod
ule>   s   
>temp_558|source:ó|start:1021|stop:1521
|  j  j d | ƒ d  S(   Ns   >%s(   R   t   write(   R   R	   (    ( 
   sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.
pyR      s    c         C   s0   | r |  j  | ƒ } n  |  j j d | 
ƒ d  S(   Ns   %s(   R   R   R   (   R   R   R   (    (    sJ   /mnt
/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s
    iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j 
g  t  d t | ƒ d
>temp_559|source:ó|start:207|stop:707
n d ƒ n  d S(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  „  Z
 e e d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c         C 
  s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht  
 opent   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/read
s-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c
         C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n | 
 j | j ƒ |  j 
>temp_560|source:ó|start:4744|stop:5244
azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    
	c         C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |
  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ | d
 |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.
   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    (  
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yRD   x   s    	!
>temp_561|source:ó|start:4035|stop:4535
 ƒ  } | |  j k rq |  j | d j |  j ƒ |  j | d c d 7<q i |
  j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j D] } |  j
 | d | f ^ qº d t ƒD] } | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ
  d  S(   NR   t   counti   R	   R   t   reverse(   t   next_regulart 
  hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t   sortedR   R/
   R   t   total_uniqueR6   (   R   t   hasht   i(    (    sJ   /mnt/data/a
zomer/reads-for-asse
>temp_562|source:ó|start:3697|stop:4197
   file_pointert   seekt	   total_seqR   t	   readlinest   startswitht   reset
t   init_unique_hash(   R   R)   R,   R   R-   t   l(    (    sJ   /mnt/dat
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   C   s(   
 													:	c         C   s  x§ |  j  ƒ  r© t j
 |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j |  j ƒ |  j | d
 c d 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  
|  j D] } |  j |
>temp_563|source:ó|start:222|stop:722
 S(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  „  Z e e d „ Z
 d „  Z e d „ Z d d „ Z d „  Z RS(   c         C   s   | |  _ 
 t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent   outpu
t_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembly/
trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c         C  
 s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j ƒ |
  j | j | ƒ d 
>temp_564|source:ó|start:4829|stop:5329
         C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  j 
} |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ | d | 
 _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/
   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    (    sJ 
  /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD  
 x   s    	!&c         C   sì   d  |  _ |  j j ƒ  d j 
ƒ  |  _ d } x | 
>temp_565|source:ó|start:10267|stop:10767
q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*
   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscore
sRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.pyR!     s*    		+#+c 
        C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ 
  /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR  
 9  s    c     
>temp_566|source:ó|start:6472|stop:6972
s    				c         C   sX  d d  l  j } d d  l j } g  } |  j ƒ 
 xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j 
j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t
 | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<qW| j
 d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j d d 
ƒ } | j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ | j
 |	 d d d d  ƒ
>temp_567|source:ó|start:3362|stop:3862
  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j
 d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t  
 >(   t   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_case
R"   R	   R   R   R   t   unique_hash_dictt   unique_hash_listt   unique_n
ext_hashR   t   file_pointert   seekt	   total_seqR   t	   readlinest   start
switht   resett   init_unique_hash(   R   R)   R,   R   R-   t   l(    (  
  sJ   /mnt/data/azo
>temp_568|source:ó|start:4049|stop:4549
j k rq |  j | d j |  j ƒ |  j | d c d 7<q i |  j d 6|  j
 g d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j D] } |  j | d | f
 ^ qº d t ƒD] } | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR
   t   counti   R	   R   t   reverse(   t   next_regulart   hashlibt   
sha1R   t   uppert	   hexdigestR.   R&   R	   t   sortedR   R/   R   t   to
tal_uniqueR6   (   R   t   hasht   i(    (    sJ   /mnt/data/azomer/reads-fo
r-assembly/trainings
>temp_569|source:ó|start:5748|stop:6248
   ˆ   s$    		#c         C   s7   |  j  ƒ  
x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE  
 (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c     
    C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mn
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   
s    c         
>temp_570|source:ó|start:3056|stop:3556
ils/fastalib.pyR   .   s   	R   c           B   sn   e  Z e e e d  „
 Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d
 „ Z RS(   c         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ 
d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ 
|  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | 
^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fa
sta_file_patht   No
>temp_571|source:ó|start:6490|stop:6990
       C   sX  d d  l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |
  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  | j t 
|  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ 
d } n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j
 j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d 
d !ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ
| j t d | d 
>temp_572|source:ó|start:8992|stop:9492
e_length_distribution³   sx    ")"/
)/*)		N(   R   R
   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    (    s
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
   B   s   								t   QualSourcec           B   s/   e  Z e d  „
 Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ |
 |  _ d |  _ d  |
>temp_573|source:ó|start:9871|stop:10371
ainingsets2/Vibrio/utils/fastalib.pyR     s    									:c
         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j 
 j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw 
|  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ 
Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|
  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R
‘   R’   R   R   Rj
>temp_574|source:ó|start:1329|stop:1829
   sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.
pyR   !   s    iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g
 } d j g  t  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(  
 Ni    s   i   (   t   rangeR   t   join(   R   t   sequencet   piece_l
engtht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ
  d  S(   N(   R
>temp_575|source:ó|start:1596|stop:2096
rangeR   t   join(   R   t   sequencet   piece_lengtht   tickst   x(    (
    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib
.pyR   &   s    %c         C   s   |  j  j ƒ  d  S(   N(   R   t  
 close(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2
/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name__t   __module__R   t
   TrueR   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets
>temp_576|source:ó|start:9967|stop:10467
F  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ 
t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7
_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j 
ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7
_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R 
  Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (  
  sJ   /mnt/data/azo
>temp_577|source:ó|start:9035|stop:9535
)"/)/*)		
N(   R   R   R   RE   R   R7   R!   RD   R;   RM  
 R   R6   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/tra
iningsets2/Vibrio/utils/fastalib.pyR   B   s   								t   QualSourc
ec           B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c   
      C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ 
g  |  _ t	 |  j  ƒ
>temp_578|source:ó|start:8294|stop:8794
-   t   matplotlib.pyplott   pyplott   matplotlib.gridspect   gridspecR6   R
!   R"   R#   R$   R   R%   R&   R   R   t   maxt   intt   figuret   rcPar
amst   updatet   rct   GridSpect   subplotRO   R   t   subplots_adjustt  
 plott   fill_betweenR   t   ylabelt   xlabelR*   t   xtickst   ytickst  
 xlimt   ylimt   figtextRE   t   textt   numpyt   meant   stdt   minR)   
t   savefigt   show(   R   t   titlet   destt   max_seq_lent	   xtickstep
t	   ytickstept   p
>temp_579|source:ó|start:10380|stop:10880
   RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s* 
   		+#+c         C   s   |  j  j ƒ  d
  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-asse
mbly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c         C   sA  
 d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    ( 
  R"   R*   R	   R‘ 
>temp_580|source:ó|start:1787|stop:2287
   C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *
   s    (	   t   __name__t   __module__R   t   TrueR   R   R   R   R  
 (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/ut
ils/fastalib.pyR       s   		t	   ReadFastac           B   s   e  
Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  
_ x‘ |  j j ƒ  r´
>temp_581|source:ó|start:6377|stop:6877
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyR6   ¬   s    				c         C   sX  d d  l  j } d d  l j }
 g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d | 
 j ƒ t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | s
é t | ƒ t t | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c 
d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ
| j d d ƒ }
>temp_582|source:ó|start:1325|stop:1825
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyR   !   s    iP   c         C   sk   t  d t | ƒ | ƒ t | 
ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S
(   Ni    s   i   (   t   rangeR   t   join(   R   t   sequencet   pie
ce_lengtht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/trai
ningsets2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j  
j ƒ  d  S(   N( 
>temp_583|source:ó|start:4902|stop:5402
 |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ |
 d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   
R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    (
    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib
.pyRD   x   s    	!&c         C   sì   d  |  _ |  j j ƒ  
d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_
 t St	 Sn  | j d
>temp_584|source:ó|start:10468|stop:10968
er/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    	
	+#+c         C   s   |  j  j ƒ  d  S(   
N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/tra
iningsets2/Vibrio/utils/fastalib.pyR   9  s    c         C   sA   d |  _
  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R
*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/read
s-for-assembly/train
>temp_585|source:ó|start:8860|stop:9360
t   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_length_distribution³   sx 
   ")"/)/*)
		N(   R   R   R   RE   R   R7   R!  
 RD   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   							
	t   QualSourcec   
>temp_586|source:ó|start:4858|stop:5358
|  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc 
| d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni
    i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R
   t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&
c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ 
 } | sr t | ƒ r
>temp_587|source:ó|start:8160|stop:8660
olorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vag
š™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplot
t   pyplott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%
   R&   R   R   t   maxt   intt   figuret   rcParamst   updatet   rct   
GridSpect   subplotRO   R   t   subplots_adjustt   plott   fill_betweenR  
 t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt   figtext
RE   t   textt   n
>temp_588|source:ó|start:6719|stop:7219
 d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@
 ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } |
 j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d 
d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d
$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  | j 
t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t |	
 ƒ d | ƒ D] }
>temp_589|source:ó|start:3916|stop:4416
alib.pyR   C   s(    													:	c         C   s
  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j |
  j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6d d 6| 
 j | <q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD] } | d
 ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R 
  t   reverse(   t   next_regulart   hashlibt   sha1R   t   uppert	   hex
digestR.   R&   R	  
>temp_590|source:ó|start:7304|stop:7804
 j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ
 | j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& 
j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y 
| j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Ni
ÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t   figsizei 
  i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linestylet 
  -t	   linewidthgš™
>temp_591|source:ó|start:4896|stop:5396
 |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d | 
 _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA  
 R"   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyRD   x   s    	!&c         C   sì   d  |  _ |  j 
j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j 
d 7_ t St	 Sn  |
>temp_592|source:ó|start:2845|stop:3345
  (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/
fastalib.pyR   >   s    (   R   R   R   R   (    (    (    sJ   /mnt/dat
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s   
	R   c           B   sn   e  Z e e e d  „ Z d „  Z d „  Z d „  Z
 d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C  
 s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ |
 |  _	 i  |  _ g  | 
>temp_593|source:ó|start:7254|stop:7754
| ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 
d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ |
 j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f 
d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ 
| d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fast
alib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.line
widtht   gridt   c
>temp_594|source:ó|start:6512|stop:7012
  l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j
 d k r† t j j	 d |  j ƒ t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j
 ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } n  d g | d 
}	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ | 
j d d d d d d d ƒ| j d d ƒ } | j | d d !ƒ } | j t ƒ 
| j d d d d d d d d ƒ | j |	 d d d d  ƒ| j t d | d ƒ
 |	 d! d d d d 
>temp_595|source:ó|start:375|stop:875
 |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent 
  output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c      
   C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j
 ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet
   write_idt   idt   lent   idst	   write_seqt   seq(   R   t   entryt  
 splitt   store_fre
>temp_596|source:ó|start:7405|stop:7905
g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t 
| ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn 
| j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s
   [fastalib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   
axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	   linewidthgš™™
™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ
(\ï?t   blackt   a
>temp_597|source:ó|start:4444|stop:4944
   total_uniqueR6   (   R   t   hasht   i(    (    sJ   /mnt/data/azomer/rea
ds-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s    
	Jc         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R
   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-ass
embly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   
s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 
7_ |  j rc | d 
>temp_598|source:ó|start:6995|stop:7495
	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n 
 | d  k r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ|
 j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* 
ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 
d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ 
ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t)
 | ƒ t | ƒ f 
>temp_599|source:ó|start:4973|stop:5473
|  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   R
A   R"   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_en
try(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyRD   x   s    	!&c         C   sì   d  |  _ |  
j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  
j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | |
 j ƒ  7} q. |  j 
>temp_600|source:ó|start:296|stop:796
 „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c         C   s   |
 |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent  
 output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-ass
embly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c      
   C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j
 ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet 
  write_idt   idt 
>temp_601|source:ó|start:7922|stop:8422
 y2g333333Ã?s   number of sequencess   sequence lengthi2   i   t   rotationi
Z   t   sizes   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?
g¸…ëQ¸î?s   %st   weights   xx-larget   hat   centeri	   s   axes.edgecol
org{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™
™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott
   pyplott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%  
 R&   R   R   t   
>temp_602|source:ó|start:2423|stop:2923
 d ƒ d  S(   Niè  i    i   s)   [fastalib] Reading FASTA into memory: %s
s   (   R   t	   sequencest   SequenceSourcet   fastat   nextt   post   
syst   stderrR   t   flusht   appendR	   R   (   R   t   f_name(    (   
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
R   /   s    		(c         C   s   |  j  j ƒ  d  S(   N(
   R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/train
ingsets2/Vibrio/util
>temp_603|source:ó|start:1532|stop:2032
 | | | | | d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   jo
in(   R   t   sequencet   piece_lengtht   tickst   x(    (    sJ   /mnt/da
ta/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s  
  %c         C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyR   *   s    (	   t   __name__t   __module__R   t   TrueR   R  
 R   R   R   (   
>temp_604|source:ó|start:9017|stop:9517
sx    ")"/)/*)
		N(   R   R   R   RE   R   R7   R
!   RD   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/read
s-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   					
			t   QualSourcec           B   s/   e  Z e d  „ Z d „  Z d „  Z d 
„  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ 
d  |  _ d  |  _ g 
>temp_605|source:ó|start:8459|stop:8959
   updatet   rct   GridSpect   subplotRO   R   t   subplots_adjustt   plot
t   fill_betweenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   xlim
t   ylimt   figtextRE   t   textt   numpyt   meant   stdt   minR)   t   
savefigt   show(   R   t   titlet   destt   max_seq_lent	   xtickstept	   
ytickstept   pltRh   t   sequence_lengthst   seq_len_distributionR8   t   fi
gt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/
>temp_606|source:ó|start:7634|stop:8134
    i   s   [fastalib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌ
ÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	   li
newidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t  
 rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of seque
ncess   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   x
mint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   
xx-larget   hat   
>temp_607|source:ó|start:6552|stop:7052
 xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j 
j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t
 | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<qW| j
 d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j d d 
ƒ } | j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ | j
 |	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ 
| j d$ ƒ | d  k
>temp_608|source:ó|start:7019|stop:7519
| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d&
 pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d 
| ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ 
| j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6
 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g
  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d
6 d d< d) d= ƒ|
>temp_609|source:ó|start:6778|stop:7278
d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ
| j d d ƒ } | j | d d !ƒ } | j t ƒ | j d d d d d d d
 d ƒ | j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ
| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d
& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d 
| ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ
 | j" d- d d. t
>temp_610|source:ó|start:7638|stop:8138
i   s   [fastalib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌ
ì?s   axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	   linewi
dthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rig
htg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequences
s   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint
   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-l
arget   hat   cent
>temp_611|source:ó|start:1706|stop:2206
s-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c      
   C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   
*   s    (	   t   __name__t   __module__R   t   TrueR   R   R   R   R 
  (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/u
tils/fastalib.pyR       s   		t	   ReadFastac           B   s   e 
 Z d  „  Z d „  Z
>temp_612|source:ó|start:4865|stop:5365
 k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc | d 
n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i 
  R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t 
  False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c   
      C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | 
sr t | ƒ rk | | 
>temp_613|source:ó|start:8002|stop:8502
Z   t   sizes   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?
g¸…ëQ¸î?s   %st   weights   xx-larget   hat   centeri	   s   axes.edgecol
org{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™
™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott
   pyplott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%  
 R&   R   R   t   maxt   intt   figuret   rcParamst   updatet   rct   Gr
idSpect   subplotRO
>temp_614|source:ó|start:295|stop:795
 „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c         C   s   |
 |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent 
  output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c      
   C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j
 ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet
   write_idt   idt
>temp_615|source:ó|start:5587|stop:6087
  R"   R   RE   R5   R2   t   tellR-   R>   (   R   R   t   line(    (    
sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
;   ˆ   s$    		#c         C   s7   |  j  ƒ  
x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE 
  (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/train
ingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c    
     C   s   |  j 
>temp_616|source:ó|start:9543|stop:10043
 j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q
’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,  
 R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   
R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-ass
embly/trainingsets2/Vibrio/utils/fastalib.pyR     s    								
	:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } 
xÂ |  j  j ƒ  } |
>temp_617|source:ó|start:4346|stop:4846
egulart   hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t   sort
edR   R/   R   t   total_uniqueR6   (   R   t   hasht   i(    (    sJ   /m
nt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   _  
 s    	Jc         C   s!   |  j  r |  j ƒ  S|  j ƒ 
 Sd  S(   N(   R   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	
c         C   s£
>temp_618|source:ó|start:8680|stop:9180
dt   minR)   t   savefigt   show(   R   t   titlet   destt   max_seq_len
t	   xtickstept	   ytickstept   pltRh   t   sequence_lengthst   seq_len_distr
ibutionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_length
_distribution³   sx    ")"/
)/*)		N(   R   R   R  
 RE   R   R7   R!  
>temp_619|source:ó|start:3189|stop:3689
  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ 
| |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ 
d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ
  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(
   Ni    t   >(   t   fasta_file_patht   Nonet   namet	   lazy_initt   all
ow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   unique_hash_list
t   unique_next_has
>temp_620|source:ó|start:1697|stop:2197
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %
c         C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (    
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR   *   s    (	   t   __name__t   __module__R   t   TrueR   R   R  
 R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2
/Vibrio/utils/fastalib.pyR       s   		t	   ReadFastac           B 
  s   e  Z d  „  Z
>temp_621|source:ó|start:1099|stop:1599
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR      s    c         C   s0   | r |  j  | ƒ } n  |  j j d |
 ƒ d  S(   Ns   %s(   R   R   R   (   R   R   R   (    (    sJ   /m
nt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !  
 s    iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j
 g  t  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s 
  i   (   t   ran
>temp_622|source:ó|start:10794|stop:11294
  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	
   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					(   R
   R   R   R   R!   R   R6   (    (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s   		t   __main
__i   R‚   i   i   R*   (    (    (    (    (   R#   R|   R<   R    R   R 
  R   R   t   arg
>temp_623|source:ó|start:5030|stop:5530
  R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t   Fal
se(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-assembly/tra
iningsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c       
  C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t
 | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  
t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_
 t S(   Ni   t  
>temp_624|source:ó|start:3553|stop:4053
 Nonet   namet	   lazy_initt   allow_mixed_caseR"   R	   R   R   R   t   u
nique_hash_dictt   unique_hash_listt   unique_next_hashR   t   file_pointert
   seekt	   total_seqR   t	   readlinest   startswitht   resett   init_unique
_hash(   R   R)   R,   R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   C   s(    							
						:	c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ
 j ƒ  } | |  j k
>temp_625|source:ó|start:8491|stop:8991
  subplotRO   R   t   subplots_adjustt   plott   fill_betweenR   t   ylabe
lt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t   t
extt   numpyt   meant   stdt   minR)   t   savefigt   show(   R   t   t
itlet   destt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   seque
nce_lengthst   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    
sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt
&   visualize_sequen
>temp_626|source:ó|start:3173|stop:3673
Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  |
 |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i
  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD 
t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  
j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet	   l
azy_initt   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   
unique_hash_listt  
>temp_627|source:ó|start:1161|stop:1661
brio/utils/fastalib.pyR      s    c         C   s0   | r |  j  | ƒ }
 n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R   (   R   R   R  
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR   !   s    iP   c         C   sk   t  d t | ƒ | ƒ t
 | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | | | d !^ qE 
ƒ S(   Ni    s   i   (   t   rangeR   t   join(   R   t   sequencet  
 piece_lengtht   ti
>temp_628|source:ó|start:10753|stop:11253
    c         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j
 d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (
    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fas
talib.pyR6   <  s    					(   R   R   R   R   R!   R   R6   (    (
    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fas
talib.pyR     s   		t   __main__i   R‚   i   i   R*   (    (    ( 
   (    (   R#   R|
>temp_629|source:ó|start:345|stop:845
RS(   c         C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t 
  output_file_patht   opent   output_file_obj(   t   selfR   (    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   
__init__   s    	c         C   s`   | j  r9 | r9 |  j d | j d t |
 j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   
frequency:%d(   t   uniquet   write_idt   idt   lent   idst	   write_seqt
   seq(   R   t  
>temp_630|source:ó|start:2144|stop:2644
t	   ReadFastac           B   s   e  Z d  „  Z d „  Z RS(   c      
   C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d 
k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j 
j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [
fastalib] Reading FASTA into memory: %ss   (   R   t	   sequencest   Sequenc
eSourcet   fastat   nextt   post   syst   stderrR   t   flusht   appendR
	   R   (   R   t
>temp_631|source:ó|start:4942|stop:5442
 n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    
i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R   
t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-asse
mbly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c 
        C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } 
| sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j
 j ƒ  t | ƒ ƒ
>temp_632|source:ó|start:6632|stop:7132
j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t
 | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<qW| j
 d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j d d 
ƒ } | j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ | j
 |	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ 
| j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n 
 | j t | | d 
>temp_633|source:ó|start:5815|stop:6315
   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   R!   R
	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    
c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ | 
 j j d ƒ d  S(
>temp_634|source:ó|start:6585|stop:7085
s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  | j t |  j ƒ ƒ q1
 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } n  d g
 | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j i d d 6
ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d d !ƒ } | j
 t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ| j t d |
 d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p
5d } n  | d  k 
>temp_635|source:ó|start:7141|stop:7641
d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } |
 ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ 
| j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 
6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ 
t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  
y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A  
 Niÿÿÿÿi'  i    i 
>temp_636|source:ó|start:4564|stop:5064
ls/fastalib.pyR7   _   s    	Jc         C   s!   |  j 
 r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR!   r   s    	c         C   s£   |  j  r› |  j d k r” |  j |  j
 k  r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  
_ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (
   R   RA   R"   R.
>temp_637|source:ó|start:779|stop:1279
rite_idt   idt   lent   idst	   write_seqt   seq(   R   t   entryt   spl
itt   store_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/train
ingsets2/Vibrio/utils/fastalib.pyt   store   s    *c         C   s 
  |  j  j d | ƒ d  S(   Ns   >%s(   R   t   write(   R   R	   (    
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR      s    c         C   s0   | r |  j  | ƒ } n  |  j j d |
 ƒ d  S(   Ns  
>temp_638|source:ó|start:8482|stop:8982
idSpect   subplotRO   R   t   subplots_adjustt   plott   fill_betweenR   t
   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE
   t   textt   numpyt   meant   stdt   minR)   t   savefigt   show(   R
   t   titlet   destt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t
   sequence_lengthst   seq_len_distributionR8   t   figt   gst   ax1t   y(
    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fas
talib.pyt&   visuali
>temp_639|source:ó|start:9355|stop:9855
ec           B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c   
      C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ 
g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D
] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   q
uals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2
   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /m
nt/data/azomer/reads
>temp_640|source:ó|start:2122|stop:2622
R       s   		t	   ReadFastac           B   s   e  Z d  „  Z d 
„  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j 
ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ t j j	
 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   N
iè  i    i   s)   [fastalib] Reading FASTA into memory: %ss   (   R   t	  
 sequencest   SequenceSourcet   fastat   nextt   post   syst   stderrR   
t   flusht   appen
>temp_641|source:ó|start:5753|stop:6253
  s$    		#c         C   s7   |  j  ƒ  x& | 
 j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (  
 R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingset
s2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c         
C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/dat
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s   
 c         C   s
>temp_642|source:ó|start:7117|stop:7617
 t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t 
|	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d. t
 |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 
ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 
d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ
| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j,
 ƒ  Wn n Xd  S
>temp_643|source:ó|start:3576|stop:4076
_initt   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   uni
que_hash_listt   unique_next_hashR   t   file_pointert   seekt	   total_seqR
   t	   readlinest   startswitht   resett   init_unique_hash(   R   R)   R, 
  R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingset
s2/Vibrio/utils/fastalib.pyR   C   s(    													:	
c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k r
q |  j | d j |
>temp_644|source:ó|start:4319|stop:4819
   reverse(   t   next_regulart   hashlibt   sha1R   t   uppert	   hexdig
estR.   R&   R	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t   
hasht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyR7   _   s    	Jc         C   s!   
|  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyR!   r   s
>temp_645|source:ó|start:10831|stop:11331
 j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R  
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR6   <  s    					(   R   R   R   R   R!   R   R6   (   
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR     s   		t   __main__i   R‚   i   i   R*   (    (    
(    (    (   R#   R|   R<   R    R   R   R   R   t   argvR    RŽ   R   ( 
   (    (    sJ   /m
>temp_646|source:ó|start:4552|stop:5052
2/Vibrio/utils/fastalib.pyR7   _   s    	Jc         C 
  s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   ( 
  R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/u
tils/fastalib.pyR!   r   s    	c         C   s£   |  j  r› |  j d k r”
 |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d
 j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   
R	   R   (   R   
>temp_647|source:ó|start:6453|stop:6953
astalib.pyR6   ¬   s    				c         C   sX  d d  l  j } d d  l j
 } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d
 |  j ƒ t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ 
| sé t | ƒ t t | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 |
 c d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d 
d ƒ| j d d ƒ } | j | d d !ƒ } | j t ƒ | j d d d d d
 d d d ƒ | j 
>temp_648|source:ó|start:9000|stop:9500
_distribution³   sx    ")"/
)/*)		N(   R   R   R  
 RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   /mnt
/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s
   								t   QualSourcec           B   s/   e  Z e d  „ Z d „
  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d
 |  _ d  |  _ d  
>temp_649|source:ó|start:9753|stop:10253
R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   
  s    									:c         C   sF  |  j  j ƒ  d j ƒ  
|  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _
 g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ 
râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ 
g  |  j j ƒ  D] }
>temp_650|source:ó|start:4478|stop:4978
 hasht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibr
io/utils/fastalib.pyR7   _   s    	Jc         C   s!  
 |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R  
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR!   r   s    	c         C   s£   |  j  r› |  j d k r” |  j
 |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d j 
ƒ  |  _ | d |  _
>temp_651|source:ó|start:8963|stop:9463
alib.pyt&   visualize_sequence_length_distribution³   sx    "
)"/)/*)		
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6
   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingset
s2/Vibrio/utils/fastalib.pyR   B   s   								t   QualSourcec      
     B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c         
C   sÇ   | |  _  d 
>temp_652|source:ó|start:5147|stop:5647
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD
   x   s    	!&c         C   sì   d  |  _ |  j j ƒ  d j
 ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t 
St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. 
|  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R*   
R   R1   t   readlinet   stripR	   R   R"   R   RE   R5   R2   t   tellR-  
 R>   (   R   R  
>temp_653|source:ó|start:10911|stop:11411
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR6   <  s    					(   R   R   R   R   R!   R   R6   (   
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR     s   		t   __main__i   R‚   i   i   R*   (    (    
(    (    (   R#   R|   R<   R    R   R   R   R   t   argvR    RŽ   R   ( 
   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyt   <mo
>temp_654|source:ó|start:5179|stop:5679
ssembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&
c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  
} | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |
  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ | 
 j d 7_ t S(   Ni   t    R(   (   R*   R   R1   t   readlinet   stri
pR	   R   R"   R   RE   R5   R2   t   tellR-   R>   (   R   R   t   line( 
   (    sJ   /mnt/da
>temp_655|source:ó|start:6987|stop:7487
d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5
d } n  | d  k r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) 
d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ q
Åd) d* ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j
# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ |
 j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( 
| ƒ t) | ƒ t |
>temp_656|source:ó|start:9632|stop:10132
 |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	 
  t   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6  
 (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/tra
iningsets2/Vibrio/utils/fastalib.pyR     s    									:c 
        C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  
j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |
  _ |  j d 7_ t
>temp_657|source:ó|start:9853|stop:10353
ds-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    					
				:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  | 
 _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] 
} t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j
 ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] }
 t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH 
  RI   R	   R*   R‘ 
>temp_658|source:ó|start:9577|stop:10077
t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni  
  R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_in
tR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8  
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR     s    									:c         C   sF  |  j  
j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r
¥ | j ƒ  |  _ g  
>temp_659|source:ó|start:3579|stop:4079
itt   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   unique
_hash_listt   unique_next_hashR   t   file_pointert   seekt	   total_seqR   
t	   readlinest   startswitht   resett   init_unique_hash(   R   R)   R,   R
   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastalib.pyR   C   s(    													:	c 
        C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |
  j | d j |  j
>temp_660|source:ó|start:1430|stop:1930
iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t  d 
t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   i   (  
 t   rangeR   t   join(   R   t   sequencet   piece_lengtht   tickst   x
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyR   &   s    %c         C   s   |  j  j ƒ  d  S(   N(   R 
  t   close(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/
>temp_661|source:ó|start:8701|stop:9201
vefigt   show(   R   t   titlet   destt   max_seq_lent	   xtickstept	   yt
ickstept   pltRh   t   sequence_lengthst   seq_len_distributionR8   t   figt
   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_length_distribution³   sx  
  ")"/)/*)
		N(   R   R   R   RE   R   R7   R!   
RD   R;   RM   R   
>temp_662|source:ó|start:6935|stop:7435
 d d ƒ | j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" 
ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ
 d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ 
d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, |
 ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d
5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | 
j g  ƒ | j% d9 d
>temp_663|source:ó|start:682|stop:1182
n |  j | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   
t   uniquet   write_idt   idt   lent   idst	   write_seqt   seq(   R   t
   entryt   splitt   store_frequencies(    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   store   s    *c 
        C   s   |  j  j d | ƒ d  S(   Ns   >%s(   R   t   write( 
  R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.p
>temp_664|source:ó|start:9475|stop:9975
 _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ 
|  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |
  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t
   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (
   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyR     s    									:c   
      C   sF  |  j
>temp_665|source:ó|start:6893|stop:7393
!ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ|
 j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k
 r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  | j t | | d |
 ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ 
D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ
 d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i
 d& d8 6ƒ | j t
>temp_666|source:ó|start:3662|stop:4162
sh_listt   unique_next_hashR   t   file_pointert   seekt	   total_seqR   t	 
  readlinest   startswitht   resett   init_unique_hash(   R   R)   R,   R  
 R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.pyR   C   s(    													:	c   
      C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j
 | d j |  j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j
 d 6d d 6|  j |
>temp_667|source:ó|start:10698|stop:11198
sembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c         C   sA
   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (
   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/a
zomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    	
				(   R   R   R   R   R!   R   R6   (    (    (    sJ   /mnt/data/a
zomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s   
		t   __main__i
>temp_668|source:ó|start:4103|stop:4603
 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j 
D] } |  j | d | f ^ qº d t ƒD] } | d ^ qä |  _ t |  j ƒ |
  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t   reverse(   t   nex
t_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t   s
ortedR   R/   R   t   total_uniqueR6   (   R   t   hasht   i(    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   
_   s    
>temp_669|source:ó|start:6572|stop:7072
j d d k s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  | j t | 
 j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d
 } n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j 
j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d d
 !ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ
| j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k
 r;| d% p5d }
>temp_670|source:ó|start:10675|stop:11175
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s 
   c         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j 
d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyR6   <  s    					(   R   R   R   R   R!   R   R6   (    (  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyR     s   
>temp_671|source:ó|start:9640|stop:10140
ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   q
ualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R
   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingset
s2/Vibrio/utils/fastalib.pyR     s    									:c       
  C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  }
 | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  
j d 7_ t St Sn 
>temp_672|source:ó|start:7446|stop:7946
ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k 
r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn 
n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i
   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.5
0t	   linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸
…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g
333333Ã?s   number 
>temp_673|source:ó|start:1147|stop:1647
ainingsets2/Vibrio/utils/fastalib.pyR      s    c         C   s0   | r 
|  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R   (   
R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastalib.pyR   !   s    iP   c         C   sk   t  d t 
| ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | | |
 d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join(   R   t 
  sequencet   piece
>temp_674|source:ó|start:6391|stop:6891
nt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬  
 s    				c         C   sX  d d  l  j } d d  l j } g  } |  j ƒ
  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j
 j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t 
t | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<qW| 
j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j d d
 ƒ } | j | d d
>temp_675|source:ó|start:4668|stop:5168
d  S(   N(   R   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azom
er/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c
         C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  j
 } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ | d |
  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R
/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    (    sJ
   /mnt/data/azomer/
>temp_676|source:ó|start:7710|stop:8210
 gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	 
  linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?
t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of s
equencess   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt
   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights
   xx-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / 
mean: %.2f / std: %.
>temp_677|source:ó|start:6152|stop:6652
ta/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s  
  c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  
S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mn
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   
s    				c         C   sX  d d  l  j } d d  l j } g  } |  j ƒ 
 xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j 
j ƒ  n  | j t | 
>temp_678|source:ó|start:1254|stop:1754
d | ƒ d  S(   Ns   %s(   R   R   R   (   R   R   R   (    (    sJ
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR 
  !   s    iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g }
 d j g  t  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni  
  s   i   (   t   rangeR   t   join(   R   t   sequencet   piece_length
t   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2
/Vibrio/utils/fastal
>temp_679|source:ó|start:8858|stop:9358
igt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_length_distribution³   s
x    ")"/)/*)
		N(   R   R   R   RE   R   R7   R!
   RD   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   						
		t   QualSourcec 
>temp_680|source:ó|start:5705|stop:6205
ly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		
#c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j S
q Wt S(   N(   R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   
get_seq_by_read_id    s    c         C   s   |  j  j ƒ  d  S(   N
(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trai
ningsets2/Vibrio/uti
>temp_681|source:ó|start:1416|stop:1916
!   s    iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d
 j g  t  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    
s   i   (   t   rangeR   t   join(   R   t   sequencet   piece_lengtht
   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  d  S
(   N(   R   t   close(   R   (    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2
>temp_682|source:ó|start:9453|stop:9953
 |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  
ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ 
r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*  
 R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_qual
sR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/rea
ds-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    					
				:c  
>temp_683|source:ó|start:4570|stop:5070
talib.pyR7   _   s    	Jc         C   s!   |  j  r | 
 j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    (    s
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!
   r   s    	c         C   s£   |  j  r› |  j d k r” |  j |  j k  r”
 |  j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | 
d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R 
  RA   R"   R.   R/ 
>temp_684|source:ó|start:2810|stop:3310
 d  S(   N(   R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R 
  R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.pyR   .   s   	R   c           B   sn   e  Z e e e
 d  „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	
 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ | |  _ | |  _ d 
|  _ d  |  _ d  | 
>temp_685|source:ó|start:10032|stop:10532
j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |
  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ P
n  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q| 
 _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘
   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK  
 t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/u
tils/fastalib.pyR!  
>temp_686|source:ó|start:3422|stop:3922
j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n 
 d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet	   lazy_initt
   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   unique_ha
sh_listt   unique_next_hashR   t   file_pointert   seekt	   total_seqR   t	 
  readlinest   startswitht   resett   init_unique_hash(   R   R)   R,   R  
 R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.p
>temp_687|source:ó|start:9285|stop:9785
/Vibrio/utils/fastalib.pyR   B   s   								t   QualSourcec        
   B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c         C 
  sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t
	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j
 d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_
pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   to
tal_qualsR   R4   R5
>temp_688|source:ó|start:1718|stop:2218
ly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s  
 |  j  j ƒ  d  S(   N(   R   t   close(   R   (    (    sJ   /mnt/data/a
zomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (
	   t   __name__t   __module__R   t   TrueR   R   R   R   R   (    (    
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR       s   		t	   ReadFastac           B   s   e  Z d  „  Z
 d „  Z RS(   c 
>temp_689|source:ó|start:2319|stop:2819
d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j 
|  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib] Readi
ng FASTA into memory: %ss   (   R   t	   sequencest   SequenceSourcet   fas
tat   nextt   post   syst   stderrR   t   flusht   appendR	   R   (   R
   t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastalib.pyR   /   s    		(c         C   s   
|  j  j ƒ  d  S( 
>temp_690|source:ó|start:9527|stop:10027
|  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j 
d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pa
thR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   tota
l_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azom
er/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    		
							:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _
 d  |  _ d } xÂ |
>temp_691|source:ó|start:2800|stop:3300
  j  j ƒ  d  S(   N(   R    R   (   R   (    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R 
  R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/fastalib.pyR   .   s   	R   c           B   sn   e  Z
 e e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z
 d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ | |  _ |
 |  _ d |  _ d  |
>temp_692|source:ó|start:10475|stop:10975
s-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		
+#+c         C   s   |  j  j ƒ  d  S(   N(   R
1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingse
ts2/Vibrio/utils/fastalib.pyR   9  s    c         C   sA   d |  _  d  | 
 _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	 
  R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets
>temp_693|source:ó|start:9156|stop:9656
R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B
   s   								t   QualSourcec           B   s/   e  Z e d  „ Z 
d „  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  
_ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ 
|  j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ | 
 j ƒ  d  S(   Ni 
>temp_694|source:ó|start:2725|stop:3225
/utils/fastalib.pyR   /   s    		(c         C   s   |  j  
j ƒ  d  S(   N(   R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R 
  R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyR   .   s   	R   c           B   sn   e  Z e 
e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 
d	 d	 d	 d „ Z RS
>temp_695|source:ó|start:6312|stop:6812
S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mn
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   
s    				c         C   sX  d d  l  j } d d  l j } g  } |  j ƒ 
 xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j 
j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t
 | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<qW| j
 d	 d@ ƒ } | j 
>temp_696|source:ó|start:1376|stop:1876
iningsets2/Vibrio/utils/fastalib.pyR   !   s    iP   c         C   sk 
  t  d t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | 
| | | | d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join(
   R   t   sequencet   piece_lengtht   tickst   x(    (    sJ   /mnt/data/a
zomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %
c         C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (   
 (    sJ   /mnt/data
>temp_697|source:ó|start:5198|stop:5698
s2/Vibrio/utils/fastalib.pyRD   x   s    	!&c         C   sì
   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ r
k | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ
 ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(
   Ni   t    R(   (   R*   R   R1   t   readlinet   stripR	   R   R"   R  
 RE   R5   R2   t   tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/d
ata/azomer/reads-for
>temp_698|source:ó|start:106|stop:606
d „  ƒ  YZ d d d	 „  ƒ  YZ e d k rÄ e e  j d ƒ Z	 e	 j d e e  j ƒ
 d k rº e  j d n d ƒ n  d S(   iÿÿÿÿNt   FastaOutputc           B 
  sJ   e  Z d  „  Z e e d „ Z d „  Z e d „ Z d d „ Z d „  Z R
S(   c         C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t  
 output_file_patht   opent   output_file_obj(   t   selfR   (    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   _
_init__   s    	
>temp_699|source:ó|start:10433|stop:10933
t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/ut
ils/fastalib.pyR!     s*    		+#+c      
   C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt
/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s
    c         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j
 d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (
    (    sJ   /mnt/d
>temp_700|source:ó|start:5710|stop:6210
ainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		#
c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt
 S(   N(   R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt
/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_s
eq_by_read_id    s    c         C   s   |  j  j ƒ  d  S(   N(   
R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fa
>temp_701|source:ó|start:1784|stop:2284
     C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (    (    sJ
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR 
  *   s    (	   t   __name__t   __module__R   t   TrueR   R   R   R   R
   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyR       s   		t	   ReadFastac           B   s   
e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ 
|  _ x‘ |  j j ƒ 
>temp_702|source:ó|start:3038|stop:3538
ingsets2/Vibrio/utils/fastalib.pyR   .   s   	R   c           B   sn   e 
 Z e e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „ 
 Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ | |  _ 
| |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ 
t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } 
| j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    
t   >(   t   fast
>temp_703|source:ó|start:9281|stop:9781
ets2/Vibrio/utils/fastalib.pyR   B   s   								t   QualSourcec    
       B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c       
  C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  
_ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } 
| j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_f
ile_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t 
  total_qualsR   R4 
>temp_704|source:ó|start:8421|stop:8921
 maxt   intt   figuret   rcParamst   updatet   rct   GridSpect   subplotR
O   R   t   subplots_adjustt   plott   fill_betweenR   t   ylabelt   xlab
elR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt   nu
mpyt   meant   stdt   minR)   t   savefigt   show(   R   t   titlet   d
estt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_length
st   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/
data/azomer/reads-fo
>temp_705|source:ó|start:10542|stop:11042
 		+#+c         C   s   |  j  j ƒ  d  
S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c         C   sA   d
 |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   
R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azome
r/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    			
		(   R   R   
>temp_706|source:ó|start:5590|stop:6090
"   R   RE   R5   R2   t   tellR-   R>   (   R   R   t   line(    (    sJ 
  /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR;  
 ˆ   s$    		#c         C   s7   |  j  ƒ  x&
 |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (
   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c      
   C   s   |  j  j
>temp_707|source:ó|start:10724|stop:11224
o/utils/fastalib.pyR   9  s    c         C   sA   d |  _  d  |  _ d  | 
 _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’
   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/t
rainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					(   R   R   R
   R   R!   R   R6   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/t
rainingsets2/Vibrio/utils/fastalib.pyR     s   		t   __main__i   R‚ 
  i   i   R*   (  
>temp_708|source:ó|start:9368|stop:9868
 B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c         C   s
Ç   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |
  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j d
 ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pat
hR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total
_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azome
r/reads-for-assembly
>temp_709|source:ó|start:5805|stop:6305
   C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R
6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id  
  s    c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   
R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/uti
ls/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g
  |  _ |  j j d 
>temp_710|source:ó|start:54|stop:554
 l Z d d d „  ƒ  YZ d d d „  ƒ  YZ d d d „  ƒ  YZ d d d	 „  ƒ  
YZ e d k rÄ e e  j d ƒ Z	 e	 j d e e  j ƒ d k rº e  j d n d ƒ
 n  d S(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  „  Z e e 
d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c         C   s   
| |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent
   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets
>temp_711|source:ó|start:3998|stop:4498
x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j |  
j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j
 | <q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD] } | d 
^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R   
t   reverse(   t   next_regulart   hashlibt   sha1R   t   uppert	   hexdi
gestR.   R&   R	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t  
 hasht   i(    (   
>temp_712|source:ó|start:3660|stop:4160
hash_listt   unique_next_hashR   t   file_pointert   seekt	   total_seqR   t
	   readlinest   startswitht   resett   init_unique_hash(   R   R)   R,   R
   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib.pyR   C   s(    													:	c  
       C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq | 
 j | d j |  j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  
j d 6d d 6|  j 
>temp_713|source:ó|start:9421|stop:9921
 RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |
  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g
  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(
   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR 
  R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyR     s   
>temp_714|source:ó|start:10323|stop:10823
  R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ  
 (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		+
#+c         C   s   |  j  j ƒ  d  S(   N(   R1   R  
 (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibr
io/utils/fastalib.pyR   9  s    c         C   sA   d |  _  d  |  _ d  |
  _ d  |  _ g  |  
>temp_715|source:ó|start:4746|stop:5246
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	
c         C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  
j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ | d 
|  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.  
 R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    (    
sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
D   x   s    	!
>temp_716|source:ó|start:5106|stop:5606
 False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-assembly
/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c    
     C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | s
r t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j
 ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d
 7_ t S(   Ni   t    R(   (   R*   R   R1   t   readlinet   stripR	   R
   R"   R   RE   R5
>temp_717|source:ó|start:9676|stop:10176
als_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2 
  t   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mn
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     
s    									:c         C   sF  |  j  j ƒ  d j ƒ  |  
_ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g 
 |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ 
|  j  j |  j  j ƒ 
>temp_718|source:ó|start:7262|stop:7762
 j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ
| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ
 | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d
 d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ
 n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Re
ading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht 
  gridt   colors  
>temp_719|source:ó|start:9322|stop:9822
  								t   QualSourcec           B   s/   e  Z e d  „ Z d „  
Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d 
|  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j 
r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ 
 d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qua
lst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R  
 R   R,   R8   (   
>temp_720|source:ó|start:7757|stop:8257
rs   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t  
 bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó
?t   y2g333333Ã?s   number of sequencess   sequence lengthi2   i   t   rota
tioniZ   t   sizes   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g   
   à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   centeri	   s   axes.ed
gecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   
vagš™™™™™é?s   x-la
>temp_721|source:ó|start:1901|stop:2401
y/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name__t   __
module__R   t   TrueR   R   R   R   R   (    (    (    sJ   /mnt/data/azo
mer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR       s   	
	t	   ReadFastac           B   s   e  Z d  „  Z d „  Z RS(   c   
      C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d 
d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  
j j ƒ |  j j | 
>temp_722|source:ó|start:4781|stop:5281
s2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   s£   |  j  r› |  
j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc | 
d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni   
 i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R  
 t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-ass
embly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c
         C   sì   
>temp_723|source:ó|start:1599|stop:2099
geR   t   join(   R   t   sequencet   piece_lengtht   tickst   x(    (   
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
R   &   s    %c         C   s   |  j  j ƒ  d  S(   N(   R   t   cl
ose(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.pyR   *   s    (	   t   __name__t   __module__R   t   
TrueR   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-asse
mbly/trainingsets2/V
>temp_724|source:ó|start:10080|stop:10580
j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j
  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j
 j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    
(   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   R
J   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/reads-for
-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		
+#
>temp_725|source:ó|start:7496|stop:7996
; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ |
 d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastal
ib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewi
dtht   gridt   colors   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i   t
   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   bl
ackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   sequence leng
thi2   i   t   rot
>temp_726|source:ó|start:3658|stop:4158
e_hash_listt   unique_next_hashR   t   file_pointert   seekt	   total_seqR  
 t	   readlinest   startswitht   resett   init_unique_hash(   R   R)   R,   
R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2
/Vibrio/utils/fastalib.pyR   C   s(    													:	c
         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq 
|  j | d j |  j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|
  j d 6d d 6|  j
>temp_727|source:ó|start:3529|stop:4029
t   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R	
   R   R   R   t   unique_hash_dictt   unique_hash_listt   unique_next_has
hR   t   file_pointert   seekt	   total_seqR   t	   readlinest   startswitht
   resett   init_unique_hash(   R   R)   R,   R   R-   t   l(    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   C
   s(    													:	c         C   s  x§ |  j  ƒ 
 r© t j |  j j ƒ
>temp_728|source:ó|start:7753|stop:8253
colors   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?
t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333
333Ó?t   y2g333333Ã?s   number of sequencess   sequence lengthi2   i   t   
rotationiZ   t   sizes   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@
g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   centeri	   s   axe
s.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st
   vagš™™™™™é?s   
>temp_729|source:ó|start:4983|stop:5483
d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R
.   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    ( 
   sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.
pyRD   x   s    	!&c         C   sì   d  |  _ |  j j ƒ  d
 j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_
 t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7}
 q. |  j rÊ | n	 |
>temp_730|source:ó|start:5564|stop:6064
adlinet   stripR	   R   R"   R   RE   R5   R2   t   tellR-   R>   (   R   R
   t   line(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.pyR;   ˆ   s$    		#c     
    C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   
R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id 
   s    c  
>temp_731|source:ó|start:5900|stop:6400
   RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    
c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    
sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j
 d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   R   (    (
    sJ   /mnt/data/a
>temp_732|source:ó|start:8581|stop:9081
elR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt   nu
mpyt   meant   stdt   minR)   t   savefigt   show(   R   t   titlet   d
estt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_length
st   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/
data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visual
ize_sequence_length_distribution³   sx    ")
"/
>temp_733|source:ó|start:5677|stop:6177
data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$
    		#c         C   s7   |  j  ƒ  x& |  j 
ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R 
  t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c         C   
s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/az
omer/reads-for-assem
>temp_734|source:ó|start:213|stop:713
ƒ n  d S(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  „  Z e e
 d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c         C   s  
 | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent
   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c    
     C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j |
 j ƒ |  j | j 
>temp_735|source:ó|start:3110|stop:3610
 sn   e  Z e e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „ 
 Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ |
 |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d
 |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ 
 D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S( 
  Ni    t   >(   t   fasta_file_patht   Nonet   namet	   lazy_initt   allo
w_mixed_caseR"   R	 
>temp_736|source:ó|start:5245|stop:5745
&c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x
 |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ
 r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | 
j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R*   R   R1   t   rea
dlinet   stripR	   R   R"   R   RE   R5   R2   t   tellR-   R>   (   R   R
   t   line(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.p
>temp_737|source:ó|start:1105|stop:1605
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
      s    c         C   s0   | r |  j  | ƒ } n  |  j j d | ƒ 
d  S(   Ns   %s(   R   R   R   (   R   R   R   (    (    sJ   /mnt/dat
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s   
 iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t
  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   i  
 (   t   rangeR   
>temp_738|source:ó|start:1030|stop:1530
d | ƒ d  S(   Ns   >%s(   R   t   write(   R   R	   (    (    sJ   /
mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR    
  s    c         C   s0   | r |  j  | ƒ } n  |  j j d | ƒ d  S(
   Ns   %s(   R   R   R   (   R   R   R   (    (    sJ   /mnt/data/azo
mer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s    
iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t  d 
t | ƒ d ƒ D] 
>temp_739|source:ó|start:8931|stop:9431
/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_length_distributi
on³   sx    ")"/)/
*)		N(   R   R   R   RE   R   
R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   /mnt/data/azome
r/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   		
						t   QualSourcec           B   s/   e  Z e d  „ Z d „  Z d „  
Z d „  Z RS(   c
>temp_740|source:ó|start:4819|stop:5319
    	c         C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j | 
 j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _
 | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"
   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyRD   x   s    	!&c         C   sì   d  |  _ |  j j 
ƒ  d j ƒ  |  _ d
>temp_741|source:ó|start:6795|stop:7295
	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ 
} | j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	
 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | 
j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  |
 j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t
 |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d.
 t |	 ƒ t |	 ƒ d
>temp_742|source:ó|start:8146|stop:8646
   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / m
ax: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t   mat
plotlib.pyplott   pyplott   matplotlib.gridspect   gridspecR6   R!   R"   R# 
  R$   R   R%   R&   R   R   t   maxt   intt   figuret   rcParamst   upda
tet   rct   GridSpect   subplotRO   R   t   subplots_adjustt   plott   fi
ll_betweenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   yl
imt   figtextRE   t
>temp_743|source:ó|start:9938|stop:10438
	:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d
 } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	
 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t
 | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 
| ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI 
  R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   
qualscoresRK   t   
>temp_744|source:ó|start:9865|stop:10365
bly/trainingsets2/Vibrio/utils/fastalib.pyR     s    									
:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ
 |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ 
^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ
 ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^
 q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   
R*   R‘   R’   R   R
>temp_745|source:ó|start:627|stop:1127
 | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j ƒ |  j 
| j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt
   idt   lent   idst	   write_seqt   seq(   R   t   entryt   splitt   s
tore_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2
/Vibrio/utils/fastalib.pyt   store   s    *c         C   s   |  j  
j d | ƒ d  S(   Ns   >%s(   R   t   write(   R   R	   (    (    sJ 
  /mnt/data/azomer/r
>temp_746|source:ó|start:4654|stop:5154
ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r
   s    	c         C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  
j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d 
|  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA
   R"   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_ent
ry(    (    sJ   /mn
>temp_747|source:ó|start:7414|stop:7914
 j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f 
d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ 
| d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fast
alib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.line
widtht   gridt   colors   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i  
 t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   
blackt   alphag3333
>temp_748|source:ó|start:10291|stop:10791
 S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj
   R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR! 
    s*    		+#+c         C   s   |  j 
 j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c        
 C   sA   d |  _  d
>temp_749|source:ó|start:7955|stop:8455
cess   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xm
int   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   x
x-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean:
 %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .
png(   i   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.gridspec
t   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R   t   maxt   intt
   figuret   rcPara
>temp_750|source:ó|start:5367|stop:5867
 |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn 
 | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   
t    R(   (   R*   R   R1   t   readlinet   stripR	   R   R"   R   RE   R5 
  R2   t   tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/data/azome
r/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    	
	#c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j
 | k r |  j Sq
>temp_751|source:ó|start:7045|stop:7545
| d  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  | j t | 
| d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d
 | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ 
t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } |
 j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t |
 ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k
 r|  j* } n  y |
>temp_752|source:ó|start:4356|stop:4856
 hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t   sortedR   R/ 
  R   t   total_uniqueR6   (   R   t   hasht   i(    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s    
	Jc         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(  
 N(   R   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c      
   C   s£   |  j  r
>temp_753|source:ó|start:2002|stop:2502
eR   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibrio/utils/fastalib.pyR       s   		t	   ReadFasta
c           B   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  | 
 _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d 
k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  
j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib] Reading 
FASTA into memory: %
>temp_754|source:ó|start:8741|stop:9241
estt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_length
st   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/
data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visual
ize_sequence_length_distribution³   sx    ")
"/)/*)		N
(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    
(    (    sJ   /mnt/
>temp_755|source:ó|start:176|stop:676
e e  j ƒ d k rº e  j d n d ƒ n  d S(   iÿÿÿÿNt   FastaOutputc    
       B   sJ   e  Z d  „  Z e e d „ Z d „  Z e d „ Z d d „ Z 
d „  Z RS(   c         C   s   | |  _  t | d ƒ |  _ d  S(   Nt   
w(   t   output_file_patht   opent   output_file_obj(   t   selfR   (    
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyt   __init__   s    	c         C   s`   | j  r9 | r9 |  j d | j
 d t | j ƒ f
>temp_756|source:ó|start:9810|stop:10310
   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR     s    									:c         C   sF 
 |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t 
| ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ 
t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  
7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t
 S(   Ni   RG   R
>temp_757|source:ó|start:4456|stop:4956
queR6   (   R   t   hasht   i(    (    sJ   /mnt/data/azomer/reads-for-assem
bly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s    	J
c         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   nex
t_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   s£   |  j  r
› |  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j r
c | d n | d j
>temp_758|source:ó|start:5729|stop:6229
utils/fastalib.pyR;   ˆ   s$    		#c         
C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   
R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s 
   c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R  
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR   ©   s
>temp_759|source:ó|start:3426|stop:3926
 ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  
S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet	   lazy_initt   
allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   unique_hash_l
istt   unique_next_hashR   t   file_pointert   seekt	   total_seqR   t	   re
adlinest   startswitht   resett   init_unique_hash(   R   R)   R,   R   R- 
  t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR 
>temp_760|source:ó|start:4166|stop:4666
 Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD] } | d ^ qä | 
 _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t   rev
erse(   t   next_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.  
 R&   R	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t   hasht 
  i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyR7   _   s    	Jc         C   s!   |  j  r
 |  j ƒ  S|  j ƒ 
>temp_761|source:ó|start:5804|stop:6304
    C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   
R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id 
   s    c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (  
 R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/ut
ils/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ 
g  |  _ |  j j d
>temp_762|source:ó|start:7398|stop:7898
| j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) |
 ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ
 Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    
i   s   [fastalib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌ
ì?s   axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	   linewi
dthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rig
htg\Âõ(\ï?t   blac
>temp_763|source:ó|start:7229|stop:7729
d) d* ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# 
d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | 
j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( |
 ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+
 | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi
'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t   figsizei   i 
  gÍÌÌÌÌÌì?s   axes
>temp_764|source:ó|start:6583|stop:7083
 s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  | j t |  j ƒ ƒ 
q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } n  d 
g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j i d d
 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d d !ƒ } | 
j t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ| j t d
 | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% 
p5d } n  | d  k
>temp_765|source:ó|start:4501|stop:5001
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7 
  _   s    	Jc         C   s!   |  j  r |  j ƒ  S|  
j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    (    sJ   /mnt/da
ta/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s  
  	c         C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j
 |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ |
 d |  _	 t St Sn
>temp_766|source:ó|start:193|stop:693
 rº e  j d n d ƒ n  d S(   iÿÿÿÿNt   FastaOutputc           B   sJ   
e  Z d  „  Z e e d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c
         C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output
_file_patht   opent   output_file_obj(   t   selfR   (    (    sJ   /mnt/da
ta/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__
   s    	c         C   s`   | j  r9 | r9 |  j d | j d t | j ƒ 
f ƒ n |  j |
>temp_767|source:ó|start:10052|stop:10552
 ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t
 St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7
} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t 
S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj 
  R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ 
  /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!  
   s*    		
>temp_768|source:ó|start:6649|stop:7149
 |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ
 d } n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | 
j j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d
 d !ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ
| j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d
  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  | j t | | d
 | ƒ d' d( d) d*
>temp_769|source:ó|start:6799|stop:7299
 ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } |
 j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d 
d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d
$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  | j 
t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t |	
 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d. t 
|	 ƒ t |	 ƒ d/ 
>temp_770|source:ó|start:168|stop:668
e	 j d e e  j ƒ d k rº e  j d n d ƒ n  d S(   iÿÿÿÿNt   FastaOut
putc           B   sJ   e  Z d  „  Z e e d „ Z d „  Z e d „ Z d d
 „ Z d „  Z RS(   c         C   s   | |  _  t | d ƒ |  _ d  S( 
  Nt   w(   t   output_file_patht   opent   output_file_obj(   t   selfR
   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyt   __init__   s    	c         C   s`   | j  r9 | r9 |  j
 d | j d t | j
>temp_771|source:ó|start:1332|stop:1832
sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
   !   s    iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g 
} d j g  t  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni
    s   i   (   t   rangeR   t   join(   R   t   sequencet   piece_leng
tht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingset
s2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  
d  S(   N(   R   
>temp_772|source:ó|start:7034|stop:7534
 j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  |
 j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d 
t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d
. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d
7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 
d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ
| d  k r|  j*
>temp_773|source:ó|start:5493|stop:5993
 _ |  j d 7_ t S(   Ni   t    R(   (   R*   R   R1   t   readlinet 
  stripR	   R   R"   R   RE   R5   R2   t   tellR-   R>   (   R   R   t   
line(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/util
s/fastalib.pyR;   ˆ   s$    		#c         C   
s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   R!  
 R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingset
>temp_774|source:ó|start:1639|stop:2139
   piece_lengtht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   
|  j  j ƒ  d  S(   N(   R   t   close(   R   (    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	
   t   __name__t   __module__R   t   TrueR   R   R   R   R   (    (    (
    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib
.pyR       s   	
>temp_775|source:ó|start:7508|stop:8008
) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n 
Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading:
 %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   grid
t   colors   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™
™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt   alph
ag333333Ó?t   y2g333333Ã?s   number of sequencess   sequence lengthi2   i   
t   rotationiZ   t
>temp_776|source:ó|start:5361|stop:5861
 |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ
 Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   
Ni   t    R(   (   R*   R   R1   t   readlinet   stripR	   R   R"   R   RE
   R5   R2   t   tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    
		#c         C   s7   |  j  ƒ  x& |  j ƒ  r
2 |  j | k r |  
>temp_777|source:ó|start:10410|stop:10910
   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/
trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		+
#+c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R
   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/util
s/fastalib.pyR   9  s    c         C   sA   d |  _  d  |  _ d  |  _ d 
 |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R 
  R1   R2   (   R 
>temp_778|source:ó|start:8552|stop:9052
weenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt  
 figtextRE   t   textt   numpyt   meant   stdt   minR)   t   savefigt   s
how(   R   t   titlet   destt   max_seq_lent	   xtickstept	   ytickstept  
 pltRh   t   sequence_lengthst   seq_len_distributionR8   t   figt   gst   
ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyt&   visualize_sequence_length_distribution³   sx    
")
>temp_779|source:ó|start:5985|stop:6485
iningsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c   
      C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©
   s    c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d 
ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   R   (    (    s
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6
   ¬   s    				
>temp_780|source:ó|start:2551|stop:3051
t   fastat   nextt   post   syst   stderrR   t   flusht   appendR	   R 
  (   R   t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyR   /   s    		(c         C
   s   |  j  j ƒ  d  S(   N(   R    R   (   R   (    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    
(   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibr
>temp_781|source:ó|start:10431|stop:10931
  t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR!     s*    		+#+c    
     C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /m
nt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9 
 s    c         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j
 j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R  
 (    (    sJ   /mnt
>temp_782|source:ó|start:6911|stop:7411
 | j d d d d d d d d ƒ | j |	 d d d d  ƒ| j t d | d 
ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d }
 n  | d  k r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ
| j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd)
 d* ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0
 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j
 t$ ƒ | j  g  ƒ 
>temp_783|source:ó|start:2162|stop:2662
c           B   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  | 
 _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d 
k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  
j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib] Reading 
FASTA into memory: %ss   (   R   t	   sequencest   SequenceSourcet   fastat
   nextt   post   syst   stderrR   t   flusht   appendR	   R   (   R  
 t   f_name(    (  
>temp_784|source:ó|start:8295|stop:8795
   t   matplotlib.pyplott   pyplott   matplotlib.gridspect   gridspecR6   R!
   R"   R#   R$   R   R%   R&   R   R   t   maxt   intt   figuret   rcPara
mst   updatet   rct   GridSpect   subplotRO   R   t   subplots_adjustt   
plott   fill_betweenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   
xlimt   ylimt   figtextRE   t   textt   numpyt   meant   stdt   minR)   t
   savefigt   show(   R   t   titlet   destt   max_seq_lent	   xtickstept
	   ytickstept   pl
>temp_785|source:ó|start:9410|stop:9910
 Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d |  _ d 
 |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  | 
 _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(
   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   q
uals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,
   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR 
>temp_786|source:ó|start:375|stop:875
 |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent 
  output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c      
   C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j
 ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet
   write_idt   idt   lent   idst	   write_seqt   seq(   R   t   entryt  
 splitt   store_fre
>temp_787|source:ó|start:4627|stop:5127
   C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR; 
  (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.pyR!   r   s    	c         C   s£   |  j  r› |  j d 
k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc | d n
 | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   
R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t   
False(   R   t   h
>temp_788|source:ó|start:7813|stop:8313
  t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t  
 blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   sequence l
engthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt   ym
int   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat  
 centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f 
/ min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i   
(-   t   matplotlib
>temp_789|source:ó|start:10172|stop:10672
 ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] 
} t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH
   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R 
  t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/tr
ainingsets2/Vibrio/utils/fastalib.pyR!     s*    		+
#+c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R 
  (    (    sJ   /mn
>temp_790|source:ó|start:2039|stop:2539
    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib
.pyR       s   		t	   ReadFastac           B   s   e  Z d  „  Z 
d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j 
j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ t j
 j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S( 
  Niè  i    i   s)   [fastalib] Reading FASTA into memory: %ss   (   R   t
	   sequencest   Se
>temp_791|source:ó|start:10601|stop:11101
s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c
         C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ 
d  S(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (   
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
R6   <  s    					(   R   R   R   R   R!   R   R6   (    (    (   
 sJ   /mnt/data/azom
>temp_792|source:ó|start:2188|stop:2688
 d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _
 x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j
 ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d 
ƒ d  S(   Niè  i    i   s)   [fastalib] Reading FASTA into memory: %ss   
(   R   t	   sequencest   SequenceSourcet   fastat   nextt   post   syst
   stderrR   t   flusht   appendR	   R   (   R   t   f_name(    (    sJ  
 /mnt/data/azomer/re
>temp_793|source:ó|start:216|stop:716
n  d S(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  „  Z e e d
 „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c         C   s   |
 |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent  
 output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-ass
embly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c      
   C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j
 ƒ |  j | j | 
>temp_794|source:ó|start:3210|stop:3710
 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ | |  _ | |  _ d 
|  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ 
|  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ 
r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t
   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R	 
  R   R   R   t   unique_hash_dictt   unique_hash_listt   unique_next_hash
R   t   file_point
>temp_795|source:ó|start:122|stop:622
d d	 „  ƒ  YZ e d k rÄ e e  j d ƒ Z	 e	 j d e e  j ƒ d k rº e  j
 d n d ƒ n  d S(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  
„  Z e e d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c      
   C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_pat
ht   opent   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer
/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s   
 	c         C  
>temp_796|source:ó|start:9986|stop:10486
j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ
  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j
 d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ 
 |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG
   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE
   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/az
omer/reads-for-assem
>temp_797|source:ó|start:1722|stop:2222
rainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  
j  j ƒ  d  S(   N(   R   t   close(   R   (    (    sJ   /mnt/data/azome
r/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   
t   __name__t   __module__R   t   TrueR   R   R   R   R   (    (    (   
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
R       s   		t	   ReadFastac           B   s   e  Z d  „  Z d 
„  Z RS(   c    
>temp_798|source:ó|start:6858|stop:7358
| j d d ƒ } | j | d d !ƒ } | j t ƒ | j d d d d d d d
 d ƒ | j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ
| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d
& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d 
| ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ
 | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d
6 ƒ| j | d7 ƒ
>temp_799|source:ó|start:5025|stop:5525
  R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t 
  False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c   
      C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | 
sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j
 ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d
 7_ t S(   Ni 
>temp_800|source:ó|start:8756|stop:9256
_lent	   xtickstept	   ytickstept   pltRh   t   sequence_lengthst   seq_len_d
istributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/rea
ds-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_le
ngth_distribution³   sx    ")"/
)/*)		N(   R   R   
R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   
/mnt/data/azomer/rea
>temp_801|source:ó|start:801|stop:1301
t   idst	   write_seqt   seq(   R   t   entryt   splitt   store_frequenci
es(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/
fastalib.pyt   store   s    *c         C   s   |  j  j d | ƒ 
d  S(   Ns   >%s(   R   t   write(   R   R	   (    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c
         C   s0   | r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %
s(   R   R   R  
>temp_802|source:ó|start:7472|stop:7972
 t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ |
 d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi' 
 i    i   s   [fastalib] Reading: %ss   g      Y@i   t   figsizei   i   g
ÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	   
linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t
   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of seq
uencess   sequence 
>temp_803|source:ó|start:6084|stop:6584
 j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c      
   C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    
(   R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mnt/data/azome
r/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s    			
	c         C   sX  d d  l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ
  rŸ |  j d d k
>temp_804|source:ó|start:3943|stop:4443
											:	c         C   s  x§ |  j  ƒ  r© t j |  j
 j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j |  j ƒ |  j | d c d
 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j 
D] } |  j | d | f ^ qº d t ƒD] } | d ^ qä |  _ t |  j ƒ |
  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t   reverse(   t   nex
t_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t   s
ortedR   R/   R   t
>temp_805|source:ó|start:5303|stop:5803
 j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_
 t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7}
 q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R
*   R   R1   t   readlinet   stripR	   R   R"   R   RE   R5   R2   t   tell
R-   R>   (   R   R   t   line(    (    sJ   /mnt/data/azomer/reads-for-asse
mbly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		
#c    
>temp_806|source:ó|start:1923|stop:2423
/utils/fastalib.pyR   *   s    (	   t   __name__t   __module__R   t   True
R   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly
/trainingsets2/Vibrio/utils/fastalib.pyR       s   		t	   ReadFastac
           B   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  
_  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k
 r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j
 j ƒ q$ Wt j j
>temp_807|source:ó|start:10143|stop:10643
 j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j
 ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni  
 RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R  
 RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    
		+#+c         C   s   |  j  j ƒ  d  S
(   N(   R1   R  
>temp_808|source:ó|start:8662|stop:9162
pyt   meant   stdt   minR)   t   savefigt   show(   R   t   titlet   de
stt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_lengths
t   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visuali
ze_sequence_length_distribution³   sx    ")
"/)/*)		N(
   R   R   R   R
>temp_809|source:ó|start:7485|stop:7985
| ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn 
| j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s
   [fastalib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   
axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	   linewidthgš™™
™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ
(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   se
quence lengthi2   i
>temp_810|source:ó|start:4641|stop:5141
 j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   ( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR!   r   s    	c         C   s£   |  j  r› |  j d k r” |  j | 
 j k  r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  
|  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R  
 (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t   False(   R  
 t   hash_entry(    
>temp_811|source:ó|start:9746|stop:10246
  R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (  
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yR     s    									:c         C   sF  |  j  j ƒ  d 
j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ
  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j
 d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ 
 |  _ g  |  j j ƒ
>temp_812|source:ó|start:9887|stop:10387
io/utils/fastalib.pyR     s    									:c         C   s
F  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ 
t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7
_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j 
ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7
_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R 
  Rj   R"   R   RE 
>temp_813|source:ó|start:8590|stop:9090
   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt   numpyt   m
eant   stdt   minR)   t   savefigt   show(   R   t   titlet   destt   m
ax_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_lengthst   seq
_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/azom
er/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize_seque
nce_length_distribution³   sx    ")"
/)/
>temp_814|source:ó|start:4966|stop:5466
 | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (  
 R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   
hash_entry(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyRD   x   s    	!&c         C   sì   d  | 
 _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  
_ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn
  | | j ƒ  7} q.
>temp_815|source:ó|start:2733|stop:3233
astalib.pyR   /   s    		(c         C   s   |  j  j ƒ  d
  S(   N(   R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-asse
mbly/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R   R
   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyR   .   s   	R   c           B   sn   e  Z e e e d 
 „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	
 d „ Z RS(   c  
>temp_816|source:ó|start:2896|stop:3396
y/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R   R  
 (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/ut
ils/fastalib.pyR   .   s   	R   c           B   sn   e  Z e e e d  „
 Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d
 „ Z RS(   c         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ 
d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ 
|  j j d ƒ |  j
>temp_817|source:ó|start:5959|stop:6459
mer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_rea
d_id    s    c         C   s   |  j  j ƒ  d  S(   N(   R1   R  
 (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibr
io/utils/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |
  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   
R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastali
>temp_818|source:ó|start:8329|stop:8829
ott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R
   R   t   maxt   intt   figuret   rcParamst   updatet   rct   GridSpect
   subplotRO   R   t   subplots_adjustt   plott   fill_betweenR   t   yla
belt   xlabelR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t  
 textt   numpyt   meant   stdt   minR)   t   savefigt   show(   R   t  
 titlet   destt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   seq
uence_lengthst   se
>temp_819|source:ó|start:2301|stop:2801
d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  
j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)  
 [fastalib] Reading FASTA into memory: %ss   (   R   t	   sequencest   Sequ
enceSourcet   fastat   nextt   post   syst   stderrR   t   flusht   appe
ndR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assem
bly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		(c  
       C   s   | 
>temp_820|source:ó|start:1406|stop:1906
ib.pyR   !   s    iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ
 g } d j g  t  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(
   Ni    s   i   (   t   rangeR   t   join(   R   t   sequencet   piec
e_lengtht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/train
ingsets2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j  j
 ƒ  d  S(   N(   R   t   close(   R   (    (    sJ   /mnt/data/azomer/re
ads-for-assembly/tra
>temp_821|source:ó|start:8724|stop:9224
 t   titlet   destt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t 
  sequence_lengthst   seq_len_distributionR8   t   figt   gst   ax1t   y(  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyt&   visualize_sequence_length_distribution³   sx    "
)"/)/*)		
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6 
  R*   RŽ   (    (  
>temp_822|source:ó|start:2321|stop:2821
 k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j | 
 j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib] Reading
 FASTA into memory: %ss   (   R   t	   sequencest   SequenceSourcet   fasta
t   nextt   post   syst   stderrR   t   flusht   appendR	   R   (   R 
  t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.pyR   /   s    		(c         C   s   | 
 j  j ƒ  d  S(   
>temp_823|source:ó|start:9651|stop:10151
  Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   qu
als_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R, 
  R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/u
tils/fastalib.pyR     s    									:c         C   sF  
|  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t |
 ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t
 St Sn  | j d ƒ
>temp_824|source:ó|start:1741|stop:2241
/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  d  S(   
N(   R   t   close(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name__t   __
module__R   t   TrueR   R   R   R   R   (    (    (    sJ   /mnt/data/azo
mer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR       s   	
	t	   ReadFastac           B   s   e  Z d  „  Z d „  Z RS(   c   
      C   sÉ   g  |
>temp_825|source:ó|start:10109|stop:10609
w |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ
 Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q
|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*  
 R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscoresR
K   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyR!     s*    		+#+c   
      C   s   |  
>temp_826|source:ó|start:4120|stop:4620
d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j D] } |  j | 
d | f ^ qº d t ƒD] } | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d 
 S(   NR   t   counti   R	   R   t   reverse(   t   next_regulart   has
hlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t   sortedR   R/   R 
  t   total_uniqueR6   (   R   t   hasht   i(    (    sJ   /mnt/data/azomer
/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s    
	Jc 
>temp_827|source:ó|start:4769|stop:5269
/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   s£   
|  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ 
|  j rc | d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd 
 S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	
   R   R   t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!
&c      
>temp_828|source:ó|start:2833|stop:3333
   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib.pyR   >   s    (   R   R   R   R   (    (    (    s
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR
   .   s   	R   c           B   sn   e  Z e e e d  „ Z d „  Z d „
  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c  
       C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _
 g  |  _ | |  _	 i
>temp_829|source:ó|start:962|stop:1462
astalib.pyt   store   s    *c         C   s   |  j  j d | ƒ d
  S(   Ns   >%s(   R   t   write(   R   R	   (    (    sJ   /mnt/data/azo
mer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c 
        C   s0   | r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s
(   R   R   R   (   R   R   R   (    (    sJ   /mnt/data/azomer/reads-fo
r-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s    iP   c   
      C   sk   t  d
>temp_830|source:ó|start:7766|stop:8266
0t	   linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸
…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g
333333Ã?s   number of sequencess   sequence lengthi2   i   t   rotationiZ   
t   sizes   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…
ëQ¸î?s   %st   weights   xx-larget   hat   centeri	   s   axes.edgecolorg{
®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™
é?s   x-larges   .
>temp_831|source:ó|start:3915|stop:4415
talib.pyR   C   s(    													:	c         C   s
  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j 
|  j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6d d 6|
  j | <q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD] } | d
 ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R
   t   reverse(   t   next_regulart   hashlibt   sha1R   t   uppert	   he
xdigestR.   R&   R	 
>temp_832|source:ó|start:6470|stop:6970
  s    				c         C   sX  d d  l  j } d d  l j } g  } |  j 
ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j
 j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t
 t | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<qW|
 j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j d d
 ƒ } | j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ | 
j |	 d d d d  ƒ
>temp_833|source:ó|start:8658|stop:9158
 numpyt   meant   stdt   minR)   t   savefigt   show(   R   t   titlet 
  destt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_len
gthst   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /m
nt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   vis
ualize_sequence_length_distribution³   sx    ")
"/)/*)		
N(   R   R   R
>temp_834|source:ó|start:778|stop:1278
write_idt   idt   lent   idst	   write_seqt   seq(   R   t   entryt   sp
litt   store_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/trai
ningsets2/Vibrio/utils/fastalib.pyt   store   s    *c         C   s
   |  j  j d | ƒ d  S(   Ns   >%s(   R   t   write(   R   R	   (   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR      s    c         C   s0   | r |  j  | ƒ } n  |  j j d |
 ƒ d  S(   Ns 
>temp_835|source:ó|start:3217|stop:3717
„ Z RS(   c         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d
  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |
  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^
 q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fas
ta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R	   R   
R   R   t   unique_hash_dictt   unique_hash_listt   unique_next_hashR   t
   file_pointert   
>temp_836|source:ó|start:565|stop:1065
ils/fastalib.pyt   __init__   s    	c         C   s`   | j  r9 | r9 | 
 j d | j d t | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(
   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt   idt   lent  
 idst	   write_seqt   seq(   R   t   entryt   splitt   store_frequencies( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyt   store   s    *c         C   s   |  j  j d | ƒ d  S
(   Ns   >%s(   R
>temp_837|source:ó|start:1986|stop:2486
e__R   t   TrueR   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR       s   		
t	   ReadFastac           B   s   e  Z d  „  Z d „  Z RS(   c       
  C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k
 s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j j
 ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fa
stalib] Reading FAST
>temp_838|source:ó|start:3018|stop:3518
s-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s   	R   c   
        B   sn   e  Z e e e d  „ Z d „  Z d „  Z d „  Z d „  Z d
 „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _
  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  
_ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g 
 |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ 
 n  d  S(   Ni    
>temp_839|source:ó|start:2689|stop:3189
ds-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		(
c         C   s   |  j  j ƒ  d  S(   N(   R    R   (   R   ( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR   >   s    (   R   R   R   R   (    (    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s   	
R   c           B   sn   e  Z e e e d  „ Z d „  Z d „  Z d „  Z d
 „  Z d „  Z	 d „
>temp_840|source:ó|start:4470|stop:4970
   t   hasht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingse
ts2/Vibrio/utils/fastalib.pyR7   _   s    	Jc         
C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (
   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyR!   r   s    	c         C   s£   |  j  r› |  j d k 
r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc | d n |
 d j ƒ  |  _ | 
>temp_841|source:ó|start:6207|stop:6707
/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  
|  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   ( 
  R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/u
tils/fastalib.pyR6   ¬   s    				c         C   sX  d d  l  j } d d
  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j
 j	 d |  j ƒ t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d
 ƒ | sé t | ƒ
>temp_842|source:ó|start:9896|stop:10396
fastalib.pyR     s    									:c         C   sF  |  j 
 j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ 
r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_ t St
 Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q
7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S( 
  Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"
   R   RE   R5   R2
>temp_843|source:ó|start:4573|stop:5073
ib.pyR7   _   s    	Jc         C   s!   |  j  r |  j
 ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   
r   s    	c         C   s£   |  j  r› |  j d k r” |  j |  j k  r” | 
 j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d 
|  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   R
A   R"   R.   R/   R
>temp_844|source:ó|start:4458|stop:4958
eR6   (   R   t   hasht   i(    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s    	Jc
         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_
uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   s£   |  j  r› 
|  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc 
| d n | d j 
>temp_845|source:ó|start:8807|stop:9307
equence_lengthst   seq_len_distributionR8   t   figt   gst   ax1t   y(    (
    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib
.pyt&   visualize_sequence_length_distribution³   sx    ")
"/)/*)		
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R
*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib
>temp_846|source:ó|start:8758|stop:9258
ent	   xtickstept	   ytickstept   pltRh   t   sequence_lengthst   seq_len_dis
tributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_leng
th_distribution³   sx    ")"/
)/*)		N(   R   R   R
   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   /m
nt/data/azomer/reads
>temp_847|source:ó|start:7896|stop:8396
ackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   sequence leng
thi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt   ymint
   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   ce
nteri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / m
in: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (- 
  t   matplotlib.pyplott   pyplott   matplotlib.gridspect   gridspecR6   R! 
  R"   R#   R$   R 
>temp_848|source:ó|start:5960|stop:6460
er/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read
_id    s    c         C   s   |  j  j ƒ  d  S(   N(   R1   R   
(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  | 
 _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R
2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib
>temp_849|source:ó|start:774|stop:1274
   write_idt   idt   lent   idst	   write_seqt   seq(   R   t   entryt 
  splitt   store_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/
trainingsets2/Vibrio/utils/fastalib.pyt   store   s    *c         C 
  s   |  j  j d | ƒ d  S(   Ns   >%s(   R   t   write(   R   R	   
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyR      s    c         C   s0   | r |  j  | ƒ } n  |  j j 
d | ƒ d  S(   
>temp_850|source:ó|start:554|stop:1054
2/Vibrio/utils/fastalib.pyt   __init__   s    	c         C   s`   | j  
r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j ƒ |  j | j |
 ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt   idt
   lent   idst	   write_seqt   seq(   R   t   entryt   splitt   store_fr
equencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyt   store   s    *c         C   s   |  j  j d |
 ƒ d  S(   Ns 
>temp_851|source:ó|start:6914|stop:7414
 j d d d d d d d d ƒ | j |	 d d d d  ƒ| j t d | d ƒ 
|	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n
  | d  k r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ
| j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d*
 ƒ| j! d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1
 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$
 ƒ | j  g  ƒ |
>temp_852|source:ó|start:561|stop:1061
o/utils/fastalib.pyt   __init__   s    	c         C   s`   | j  r9 | r
9 |  j d | j d t | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d
  S(   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt   idt   len
t   idst	   write_seqt   seq(   R   t   entryt   splitt   store_frequenci
es(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/
fastalib.pyt   store   s    *c         C   s   |  j  j d | ƒ 
d  S(   Ns   >%s(
>temp_853|source:ó|start:1341|stop:1841
/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s
    iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j 
g  t  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   
i   (   t   rangeR   t   join(   R   t   sequencet   piece_lengtht   ti
ckst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  d  S(   
N(   R   t   clos
>temp_854|source:ó|start:10104|stop:10604
 ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ
 ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ 
^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	  
 R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualsc
oresRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastalib.pyR!     s*    		+#+
c         C   s 
>temp_855|source:ó|start:10876|stop:11376
 R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					(   R  
 R   R   R   R!   R   R6   (    (    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyR     s   		t   __main__
i   R‚   i   i   R*   (    (    (    (    (   R#   R|   R<   R    R   R   
R   R   t   argvR    RŽ   R   (    (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingset
>temp_856|source:ó|start:7099|stop:7599
 pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d 
| ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ 
| j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6
 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g
  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d
6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d?
 ƒ n Xy | j, ƒ
>temp_857|source:ó|start:6664|stop:7164
1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } n  d g
 | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j i d d 
6ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d d !ƒ } | j
 t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ| j t d 
| d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% 
p5d } n  | d  k r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( 
d) d* ƒ| j  t d
>temp_858|source:ó|start:8180|stop:8680
tal: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-large
s   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   pyplott   matp
lotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R   t 
  maxt   intt   figuret   rcParamst   updatet   rct   GridSpect   subplot
RO   R   t   subplots_adjustt   plott   fill_betweenR   t   ylabelt   xla
belR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt   n
umpyt   meant   st
>temp_859|source:ó|start:9238|stop:9738
nt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B  
 s   								t   QualSourcec           B   s/   e  Z e d  „ Z d
 „  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _
 d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ | 
 j r d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j
 ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t  
 qualst	   quals_int
>temp_860|source:ó|start:3650|stop:4150
   unique_hash_listt   unique_next_hashR   t   file_pointert   seekt	   tota
l_seqR   t	   readlinest   startswitht   resett   init_unique_hash(   R   R)
   R,   R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/train
ingsets2/Vibrio/utils/fastalib.pyR   C   s(    													
:	c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j
 k rq |  j | d j |  j ƒ |  j | d c d 7<q i |  j d 6|  j 
g d 6|  j d 6d 
>temp_861|source:ó|start:3174|stop:3674
 d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | 
|  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i 
 |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t
 g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j
 ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet	   la
zy_initt   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   u
nique_hash_listt   
>temp_862|source:ó|start:8074|stop:8574
      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   centeri	   s   axes
.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st
   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t   matplotlib.
pyplott   pyplott   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R
   R%   R&   R   R   t   maxt   intt   figuret   rcParamst   updatet   r
ct   GridSpect   subplotRO   R   t   subplots_adjustt   plott   fill_betwe
enR   t   ylabelt
>temp_863|source:ó|start:9798|stop:10298
R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fastalib.pyR     s    									:c      
   C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  
} | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |
  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | 
d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ | 
 j d 7_ t S(   
>temp_864|source:ó|start:10871|stop:11371
 R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					(  
 R   R   R   R   R!   R   R6   (    (    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s   		t   __m
ain__i   R‚   i   i   R*   (    (    (    (    (   R#   R|   R<   R    R   
R   R   R   t   argvR    RŽ   R   (    (    (    sJ   /mnt/data/azomer/reads
-for-assembly/traini
>temp_865|source:ó|start:8583|stop:9083
R*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt   nump
yt   meant   stdt   minR)   t   savefigt   show(   R   t   titlet   des
tt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_lengthst
   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/da
ta/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualiz
e_sequence_length_distribution³   sx    ")
"/
>temp_866|source:ó|start:9570|stop:10070
 _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(
   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   q
uals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,
   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR     s    									:c         C   sF 
 |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t 
| ƒ r¥ | j ƒ  | 
>temp_867|source:ó|start:4843|stop:5343
 s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d
 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn 
t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R
   R	   R   R   t   False(   R   t   hash_entry(    (    sJ   /mnt/data/az
omer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	
!&c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d }
 x |  j j ƒ  } |
>temp_868|source:ó|start:1618|stop:2118
 R   t   sequencet   piece_lengtht   tickst   x(    (    sJ   /mnt/data/azo
mer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c
         C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (    (
    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib
.pyR   *   s    (	   t   __name__t   __module__R   t   TrueR   R   R   
R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastali
>temp_869|source:ó|start:6653|stop:7153
j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d 
} n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j
 i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d d 
!ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ|
 j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k
 r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  | j t | | d |
 ƒ d' d( d) d* ƒ
>temp_870|source:ó|start:1569|stop:2069
 Ni    s   i   (   t   rangeR   t   join(   R   t   sequencet   piece_l
engtht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ
  d  S(   N(   R   t   close(   R   (    (    sJ   /mnt/data/azomer/reads
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __
name__t   __module__R   t   TrueR   R   R   R   R   (    (    (    sJ   /
mnt/data/azomer/read
>temp_871|source:ó|start:10634|stop:11134
R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fastalib.pyR   9  s    c         C   sA   d |  _  d  |
  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	
   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					(   R
   R   R   R   R!   R   R6   (    (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingset
>temp_872|source:ó|start:1733|stop:2233
2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  d
  S(   N(   R   t   close(   R   (    (    sJ   /mnt/data/azomer/reads-for
-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name
__t   __module__R   t   TrueR   R   R   R   R   (    (    (    sJ   /mnt/
data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR       s
   		t	   ReadFastac           B   s   e  Z d  „  Z d „  Z RS( 
  c         C   s
>temp_873|source:ó|start:2835|stop:3335
  (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.pyR   >   s    (   R   R   R   R   (    (    (    sJ 
  /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR  
 .   s   	R   c           B   sn   e  Z e e e d  „ Z d „  Z d „  
Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c   
      C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g
  |  _ | |  _	 i  
>temp_874|source:ó|start:643|stop:1143
|  j d | j d t | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  
S(   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt   idt   lent
   idst	   write_seqt   seq(   R   t   entryt   splitt   store_frequencies
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyt   store   s    *c         C   s   |  j  j d | ƒ d 
 S(   Ns   >%s(   R   t   write(   R   R	   (    (    sJ   /mnt/data/azom
er/reads-for-assembl
>temp_875|source:ó|start:9626|stop:10126
 |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"
   R	   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5 
  R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vibrio/utils/fastalib.pyR     s    									
:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ 
|  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^
 qw |  _ |  j d 
>temp_876|source:ó|start:3746|stop:4246
adlinest   startswitht   resett   init_unique_hash(   R   R)   R,   R   R- 
  t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR   C   s(    													:	c      
   C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j |
 d j |  j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 
6d d 6|  j | <q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD]
 } | d ^ qä | 
>temp_877|source:ó|start:8697|stop:9197
  savefigt   show(   R   t   titlet   destt   max_seq_lent	   xtickstept	 
  ytickstept   pltRh   t   sequence_lengthst   seq_len_distributionR8   t   
figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/train
ingsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_length_distribution³   
sx    ")"/)/*)
		N(   R   R   R   RE   R   R7   R
!   RD   R;   RM   R
>temp_878|source:ó|start:4686|stop:5186
  t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assem
bly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   s£
   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7
_ |  j rc | d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t 
Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R  
 R	   R   R   t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azome
r/reads-for-assembly
>temp_879|source:ó|start:8904|stop:9404
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize
_sequence_length_distribution³   sx    ")
"/)/*)		N( 
  R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyR   B   s   								t   QualSourcec           B   s/   e  Z 
e d  „ Z d „  Z
>temp_880|source:ó|start:1411|stop:1911
R   !   s    iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g 
} d j g  t  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   N
i    s   i   (   t   rangeR   t   join(   R   t   sequencet   piece_len
gtht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingse
ts2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  
d  S(   N(   R   t   close(   R   (    (    sJ   /mnt/data/azomer/reads-f
or-assembly/training
>temp_881|source:ó|start:3398|stop:3898
r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ
  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht   None
t   namet	   lazy_initt   allow_mixed_caseR"   R	   R   R   R   t   unique
_hash_dictt   unique_hash_listt   unique_next_hashR   t   file_pointert   s
eekt	   total_seqR   t	   readlinest   startswitht   resett   init_unique_hash
(   R   R)   R,   R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2
>temp_882|source:ó|start:9407|stop:9907
 „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d |  _
 d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d 
 |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  
S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	 
  quals_intR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R  
 R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibr
io/utils/fastalib.py
>temp_883|source:ó|start:140|stop:640
 d k rÄ e e  j d ƒ Z	 e	 j d e e  j ƒ d k rº e  j d n d ƒ n  
d S(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  „  Z e e d „ 
Z d „  Z e d „ Z d d „ Z d „  Z RS(   c         C   s   | |  
_  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent   out
put_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c         C
   s`   | j  r9 | 
>temp_884|source:ó|start:5908|stop:6408
 (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c     
    C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mn
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   
s    c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ 
d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ  
 /mnt/data/azomer/re
>temp_885|source:ó|start:10786|stop:11286
 _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"  
 R*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					
(   R   R   R   R   R!   R   R6   (    (    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s   		t 
  __main__i   R‚   i   i   R*   (    (    (    (    (   R#   R|   R<   R    
R   R   R   R   
>temp_886|source:ó|start:1653|stop:2153
ht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  d
  S(   N(   R   t   close(   R   (    (    sJ   /mnt/data/azomer/reads-for
-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name
__t   __module__R   t   TrueR   R   R   R   R   (    (    (    sJ   /mnt/
data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR       s
   		t	   
>temp_887|source:ó|start:4905|stop:5405
 j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ | d
 |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R. 
  R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    (   
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
RD   x   s    	!&c         C   sì   d  |  _ |  j j ƒ  d 
j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t
 St	 Sn  | j d ƒ
>temp_888|source:ó|start:6242|stop:6742
     C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni  
  (   R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mnt/data/azo
mer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s    		
		c         C   sX  d d  l  j } d d  l j } g  } |  j ƒ  xo |  j
 ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  |
 j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d
 ƒ pâ d } n  d
>temp_889|source:ó|start:4|stop:504
c           @   sÈ   d  d l  Z  d  d l Z d  d l Z d d d „  ƒ  YZ d 
d d „  ƒ  YZ d d d „  ƒ  YZ d d d	 „  ƒ  YZ e d k rÄ e e  j d ƒ
 Z	 e	 j d e e  j ƒ d k rº e  j d n d ƒ n  d S(   iÿÿÿÿNt   Fast
aOutputc           B   sJ   e  Z d  „  Z e e d „ Z d „  Z e d „ Z 
d d „ Z d „  Z RS(   c         C   s   | |  _  t | d ƒ |  _ d  
S(   Nt   w(   t   output_file_patht   opent   output_file_obj(   t   se
lfR   (    (    sJ 
>temp_890|source:ó|start:10207|stop:10707
ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7
_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R 
  Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (  
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yR!     s*    		+#+c         C   s   |
  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/r
eads-for-assembly/tr
>temp_891|source:ó|start:10686|stop:11186
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c    
     C   sA   d |  _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S
(   Ni    (   R"   R*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   
<  s    					(   R   R   R   R   R!   R   R6   (    (    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   
  s   		t 
>temp_892|source:ó|start:7460|stop:7960
 t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n 
 y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A 
  Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t   figsiz
ei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linestyl
et   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   to
pgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   n
umber of sequencess
>temp_893|source:ó|start:4281|stop:4781
S(   NR   t   counti   R	   R   t   reverse(   t   next_regulart   hash
libt   sha1R   t   uppert	   hexdigestR.   R&   R	   t   sortedR   R/   R  
 t   total_uniqueR6   (   R   t   hasht   i(    (    sJ   /mnt/data/azomer/
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s    
	Jc         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N( 
  R   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingset
>temp_894|source:ó|start:10943|stop:11443
/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    			
		(   R   R   R   R   R!   R   R6   (    (    (    sJ   /mnt/data/azomer
/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s   		
t   __main__i   R‚   i   i   R*   (    (    (    (    (   R#   R|   R<   R 
   R   R   R   R   t   argvR    RŽ   R   (    (    (    sJ   /mnt/data/azom
er/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   <module>   s 
  Å>
>temp_895|source:ó|start:2283|stop:2783
  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 
ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Ni
è  i    i   s)   [fastalib] Reading FASTA into memory: %ss   (   R   t	   
sequencest   SequenceSourcet   fastat   nextt   post   syst   stderrR   t
   flusht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azom
er/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		
(c   
>temp_896|source:ó|start:6845|stop:7345
 d d d ƒ| j d d ƒ } | j | d d !ƒ } | j t ƒ | j d d 
d d d d d d ƒ | j |	 d d d d  ƒ| j t d | d ƒ |	 d! d d
 d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  k 
r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d
 t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! 
d+ d d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 
d d) d4 d5 d6 ƒ|
>temp_897|source:ó|start:3718|stop:4218
eekt	   total_seqR   t	   readlinest   startswitht   resett   init_unique_hash
(   R   R)   R,   R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyR   C   s(    										
			:	c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ
  } | |  j k rq |  j | d j |  j ƒ |  j | d c d 7<q i |  j
 d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j D] } |  j |
 d | f ^ qº d
>temp_898|source:ó|start:9260|stop:9760
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   							
	t   QualSourcec           B   s/   e  Z e d  „ Z d „  Z d „  Z d „  
Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  
|  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t 
g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R
(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR
   R   R1   R2   t
>temp_899|source:ó|start:5184|stop:5684
ly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c   
      C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } |
 sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j 
j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j 
d 7_ t S(   Ni   t    R(   (   R*   R   R1   t   readlinet   stripR	  
 R   R"   R   RE   R5   R2   t   tellR-   R>   (   R   R   t   line(    ( 
   sJ   /mnt/data/az
>temp_900|source:ó|start:2603|stop:3103
   flusht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azom
er/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		
(c         C   s   |  j  j ƒ  d  S(   N(   R    R   (   R
   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/util
s/fastalib.pyR   >   s    (   R   R   R   R   (    (    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s 
  	R   c        
>temp_901|source:ó|start:681|stop:1181
n |  j | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(  
 t   uniquet   write_idt   idt   lent   idst	   write_seqt   seq(   R   
t   entryt   splitt   store_frequencies(    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   store   s    *c
         C   s   |  j  j d | ƒ d  S(   Ns   >%s(   R   t   write(
   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.
>temp_902|source:ó|start:3150|stop:3650
 „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c
         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  | 
 _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ
 |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _
 |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht
   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R	   R   R   R   t 
  unique_hash_dictt
>temp_903|source:ó|start:9690|stop:10190
*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_q
ualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    				
					:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d 
 |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D
] } t	 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j 
 j ƒ  t | ƒ ƒ 
>temp_904|source:ó|start:9244|stop:9744
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   
								t   QualSourcec           B   s/   e  Z e d  „ Z d „  Z
 d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d | 
 _ d  |  _ d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r
 d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  
d  S(   Ni    R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   quals
t	   quals_intR   R
>temp_905|source:ó|start:4999|stop:5499
Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>
   R   R	   R   R   t   False(   R   t   hash_entry(    (    sJ   /mnt/dat
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s   
 	!&c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d
 } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j
 d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | 
n	 | j ƒ  |  _ | 
>temp_906|source:ó|start:3158|stop:3658
d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c      
   C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |
  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j 
r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ
  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht   None
t   namet	   lazy_initt   allow_mixed_caseR"   R	   R   R   R   t   unique
_hash_dictt   uniqu
>temp_907|source:ó|start:2273|stop:2773
|  j j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |  j j ƒ 
t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d
  S(   Niè  i    i   s)   [fastalib] Reading FASTA into memory: %ss   (   
R   t	   sequencest   SequenceSourcet   fastat   nextt   post   syst   st
derrR   t   flusht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt
/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s
    		(
>temp_908|source:ó|start:6453|stop:6953
astalib.pyR6   ¬   s    				c         C   sX  d d  l  j } d d  l j
 } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d k r† t j j	 d
 |  j ƒ t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ 
| sé t | ƒ t t | ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 |
 c d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d 
d ƒ| j d d ƒ } | j | d d !ƒ } | j t ƒ | j d d d d d
 d d d ƒ | j 
>temp_909|source:ó|start:3836|stop:4336
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR   C   s(    													:	c         C   s
  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j |
  j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6d d 6| 
 j | <q Wg  t	 g  |  j D] } |  j | d | f ^ qº d t ƒD] } | d
 ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R 
  t   reverse(   t
>temp_910|source:ó|start:8204|stop:8704
td: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i
   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.gridspect   grid
specR6   R!   R"   R#   R$   R   R%   R&   R   R   t   maxt   intt   figure
t   rcParamst   updatet   rct   GridSpect   subplotRO   R   t   subplots_
adjustt   plott   fill_betweenR   t   ylabelt   xlabelR*   t   xtickst   
ytickst   xlimt   ylimt   figtextRE   t   textt   numpyt   meant   stdt 
  minR)   t   savef
>temp_911|source:ó|start:10790|stop:11290
d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R* 
  R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					( 
  R   R   R   R   R!   R   R6   (    (    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s   		t   __
main__i   R‚   i   i   R*   (    (    (    (    (   R#   R|   R<   R    R  
 R   R   R   t  
>temp_912|source:ó|start:9809|stop:10309
,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyR     s    									:c         C   sF
  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t
 | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ qw |  _ |  j d 7_
 t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ 
 7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ 
t S(   Ni   RG   
>temp_913|source:ó|start:2164|stop:2664
          B   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  _
  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k
 r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j
 j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib] Reading FA
STA into memory: %ss   (   R   t	   sequencest   SequenceSourcet   fastat 
  nextt   post   syst   stderrR   t   flusht   appendR	   R   (   R   t
   f_name(    (    
>temp_914|source:ó|start:7463|stop:7963
 j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y
 | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   N
iÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t   figsizei
   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linestylet
   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgf
fffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   numb
er of sequencess   
>temp_915|source:ó|start:116|stop:616
Z d d d	 „  ƒ  YZ e d k rÄ e e  j d ƒ Z	 e	 j d e e  j ƒ d k rº
 e  j d n d ƒ n  d S(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  
Z d  „  Z e e d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c  
       C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_fi
le_patht   opent   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/
azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__  
 s    	c      
>temp_916|source:ó|start:2941|stop:3441
>   s    (   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s   	R   c      
     B   sn   e  Z e e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „ 
 Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d
  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g
  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  | 
 j j ƒ  D] } | 
>temp_917|source:ó|start:5212|stop:5712
s/fastalib.pyRD   x   s    	!&c         C   sì   d  |  _ | 
 j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  
j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | |
 j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    
R(   (   R*   R   R1   t   readlinet   stripR	   R   R"   R   RE   R5   R2 
  t   tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/data/azomer/rea
ds-for-assembly/trai
>temp_918|source:ó|start:5822|stop:6322
  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R 
  RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/
trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c
         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR 
  ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j 
d ƒ d  S(   Ni  
>temp_919|source:ó|start:2713|stop:3213
sets2/Vibrio/utils/fastalib.pyR   /   s    		(c         C  
 s   |  j  j ƒ  d  S(   N(   R    R   (   R   (    (    sJ   /mnt/data/a
zomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (
   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/
trainingsets2/Vibrio/utils/fastalib.pyR   .   s   	R   c           B   sn
   e  Z e e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z 
d „  Z d	 d	 d	 d	
>temp_920|source:ó|start:4990|stop:5490
_	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/ 
  R-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   
x   s    	!&c         C   sì   d  |  _ |  j j ƒ  d j ƒ
  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t St	
 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  
j rÊ | n	 | j ƒ 
>temp_921|source:ó|start:2234|stop:2734
   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j
 j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  
j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib] 
Reading FASTA into memory: %ss   (   R   t	   sequencest   SequenceSourcet 
  fastat   nextt   post   syst   stderrR   t   flusht   appendR	   R   (
   R   t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fa
>temp_922|source:ó|start:7517|stop:8017
| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j,
 ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g
      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colo
rs   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t  
 bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó
?t   y2g333333Ã?s   number of sequencess   sequence lengthi2   i   t   rota
tioniZ   t   sizes
>temp_923|source:ó|start:9938|stop:10438
	:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d
 } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	
 | ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t
 | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 
| ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI 
  R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   
qualscoresRK   t   
>temp_924|source:ó|start:8504|stop:9004
 R   t   subplots_adjustt   plott   fill_betweenR   t   ylabelt   xlabelR
*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt   numpy
t   meant   stdt   minR)   t   savefigt   show(   R   t   titlet   dest
t   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_lengthst
   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/dat
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualize
_sequence_length_dis
>temp_925|source:ó|start:9449|stop:9949
   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t	 | 
 j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j d
 ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_path
R*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total_
qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer
/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    			
						:
>temp_926|source:ó|start:7709|stop:8209
  gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	
   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî
?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of 
sequencess   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt
   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weight
s   xx-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s /
 mean: %.2f / std: %
>temp_927|source:ó|start:4993|stop:5493
t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/   R
-   R>   R   R	   R   R   t   False(   R   t   hash_entry(    (    sJ   /m
nt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x  
 s    	!&c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |
  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn
  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j 
rÊ | n	 | j ƒ  | 
>temp_928|source:ó|start:179|stop:679
e  j ƒ d k rº e  j d n d ƒ n  d S(   iÿÿÿÿNt   FastaOutputc       
    B   sJ   e  Z d  „  Z e e d „ Z d „  Z e d „ Z d d „ Z d 
„  Z RS(   c         C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(
   t   output_file_patht   opent   output_file_obj(   t   selfR   (    (  
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yt   __init__   s    	c         C   s`   | j  r9 | r9 |  j d | j d
 t | j ƒ f ƒ
>temp_929|source:ó|start:8281|stop:8781
  i   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.gridspect   g
ridspecR6   R!   R"   R#   R$   R   R%   R&   R   R   t   maxt   intt   fig
uret   rcParamst   updatet   rct   GridSpect   subplotRO   R   t   subplo
ts_adjustt   plott   fill_betweenR   t   ylabelt   xlabelR*   t   xtickst
   ytickst   xlimt   ylimt   figtextRE   t   textt   numpyt   meant   std
t   minR)   t   savefigt   show(   R   t   titlet   destt   max_seq_lent
	   xtickstept	   yt
>temp_930|source:ó|start:1503|stop:2003
 t  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   i
   (   t   rangeR   t   join(   R   t   sequencet   piece_lengtht   tick
st   x(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/u
tils/fastalib.pyR   &   s    %c         C   s   |  j  j ƒ  d  S(   N(
   R   t   close(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/
trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name__t   __mo
dule__R   t   True
>temp_931|source:ó|start:8052|stop:8552
mint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat 
  centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f
 / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i  
 (-   t   matplotlib.pyplott   pyplott   matplotlib.gridspect   gridspecR6  
 R!   R"   R#   R$   R   R%   R&   R   R   t   maxt   intt   figuret   rcP
aramst   updatet   rct   GridSpect   subplotRO   R   t   subplots_adjustt
   plott   fill_bet
>temp_932|source:ó|start:8933|stop:9433
rainingsets2/Vibrio/utils/fastalib.pyt&   visualize_sequence_length_distribution
³   sx    ")"/)/
*)		N(   R   R   R   RE   R   R7
   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   			
					t   QualSourcec           B   s/   e  Z e d  „ Z d „  Z d „  Z
 d „  Z RS(   c 
>temp_933|source:ó|start:7459|stop:7959
 t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n
  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A
   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t   figsi
zei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   linesty
let   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   t
opgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   
number of sequencess
>temp_934|source:ó|start:470|stop:970
bj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyt   __init__   s    	c         C   s`   |
 j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j ƒ |  j | 
j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt  
 idt   lent   idst	   write_seqt   seq(   R   t   entryt   splitt   stor
e_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.
>temp_935|source:ó|start:343|stop:843
 RS(   c         C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t
   output_file_patht   opent   output_file_obj(   t   selfR   (    (    sJ
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt 
  __init__   s    	c         C   s`   | j  r9 | r9 |  j d | j d t
 | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss 
  frequency:%d(   t   uniquet   write_idt   idt   lent   idst	   write_seq
t   seq(   R   t
>temp_936|source:ó|start:7479|stop:7979
ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ
 Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i
   s   [fastalib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì
?s   axes.linewidtht   gridt   colors   0.50t	   linestylet   -t	   linewid
thgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   righ
tg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess
   sequence lengthi
>temp_937|source:ó|start:1058|stop:1558
s(   R   t   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c         C   s0
   | r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   R   R   R
   (   R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/train
ingsets2/Vibrio/utils/fastalib.pyR   !   s    iP   c         C   sk   
t  d t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | |
 | | | d !^ 
>temp_938|source:ó|start:445|stop:945
   opent   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	
c         C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n 
|  j | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t 
  uniquet   write_idt   idt   lent   idst	   write_seqt   seq(   R   t  
 entryt   splitt   store_frequencies(    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingset
>temp_939|source:ó|start:1294|stop:1794
   R   (   R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/t
rainingsets2/Vibrio/utils/fastalib.pyR   !   s    iP   c         C   s
k   t  d t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } |
 | | | | d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join
(   R   t   sequencet   piece_lengtht   tickst   x(    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    
%c         C   
>temp_940|source:ó|start:9780|stop:10280
   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    								
	:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d 
} xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 |
 ƒ ^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t 
| ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 |
 ƒ ^ q|  _ |  
>temp_941|source:ó|start:3067|stop:3567
b.pyR   .   s   	R   c           B   sn   e  Z e e e d  „ Z d „  Z
 d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS( 
  c         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d 
 |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d
 ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ | 
 _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_pa
tht   Nonet   name
>temp_942|source:ó|start:3591|stop:4091
_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   unique_hash_listt
   unique_next_hashR   t   file_pointert   seekt	   total_seqR   t	   readlin
est   startswitht   resett   init_unique_hash(   R   R)   R,   R   R-   t 
  l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyR   C   s(    													:	c         C 
  s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d 
j |  j ƒ |  j 
>temp_943|source:ó|start:671|stop:1171
 f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   freque
ncy:%d(   t   uniquet   write_idt   idt   lent   idst	   write_seqt   seq
(   R   t   entryt   splitt   store_frequencies(    (    sJ   /mnt/data/azo
mer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   store   s   
 *c         C   s   |  j  j d | ƒ d  S(   Ns   >%s(   R   t
   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assembly/traini
ngsets2/Vibrio/utils
>temp_944|source:ó|start:3007|stop:3507
azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s   
	R   c           B   sn   e  Z e e e d  „ Z d „  Z d „  Z d „  Z 
d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s
  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |
  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  
_ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 
rý |  j ƒ  n  d  S
>temp_945|source:ó|start:7803|stop:8303
™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\
Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   
sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint   x
maxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget
   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / 
std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   
i   i   (-   t   
>temp_946|source:ó|start:9041|stop:9541
)"/)/*)		
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   
R6   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fastalib.pyR   B   s   								t   QualSourcec    
       B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c       
  C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  
_ t	 |  j  ƒ |  _ 
>temp_947|source:ó|start:5856|stop:6356
 |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R   t   read_id(    
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyt   get_seq_by_read_id    s    c         C   s   |  j  j ƒ  d
  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-asse
mbly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c         C   s8  
 d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*
   R	   R   R   R1
>temp_948|source:ó|start:10571|stop:11071
#+c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R
   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyR   9  s    c         C   sA   d |  _  d  |  _ d  |  _ d  
|  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R‘   R’   R  
 R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyR6   <  s    					(   R   R   R   R  
 R!   R   R6   (   
>temp_949|source:ó|start:1515|stop:2015
 ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   i   (   t  
 rangeR   t   join(   R   t   sequencet   piece_lengtht   tickst   x(    
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR   &   s    %c         C   s   |  j  j ƒ  d  S(   N(   R   t 
  close(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name__t   __module__R   t
   TrueR   R   R
>temp_950|source:ó|start:9125|stop:9625
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6  
 R*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2
/Vibrio/utils/fastalib.pyR   B   s   								t   QualSourcec        
   B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c         C 
  sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _ g  |  _ t
	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  D] } | j
 d ƒ r’ | ^ q’ 
>temp_951|source:ó|start:6747|stop:7247
 d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ
 | j d d d d d d d ƒ| j d d ƒ } | j | d d !ƒ } | j t
 ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ| j t d | 
d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5
d } n  | d  k r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) 
d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } | ^ q
Åd) d* ƒ| j! d+
>temp_952|source:ó|start:9337|stop:9837
			t   QualSourcec           B   s/   e  Z e d  „ Z d „  Z d „  Z d 
„  Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ 
d  |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD 
t g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni  
  R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_in
tR   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8  
 (    (    sJ   /mnt
>temp_953|source:ó|start:8823|stop:9323
   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/da
ta/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visualiz
e_sequence_length_distribution³   sx    ")
"/)/*)		N(
   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    ( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR   B   s  
>temp_954|source:ó|start:6117|stop:6617
   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d 
 |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1 
  R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyR6   ¬   s    				c         C   sX  d d  l  
j } d d  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d 
k r† t j j	 d | 
>temp_955|source:ó|start:2366|stop:2866
n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè 
 i    i   s)   [fastalib] Reading FASTA into memory: %ss   (   R   t	   seq
uencest   SequenceSourcet   fastat   nextt   post   syst   stderrR   t  
 flusht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		
(c         C   s   |  j  j ƒ  d  S(   N(   R    R   (   R  
 (    (    sJ   /mnt
>temp_956|source:ó|start:5896|stop:6396
   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-ass
embly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    
c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (
    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib
.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  
j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   R   ( 
   (    sJ   /mnt/da
>temp_957|source:ó|start:8893|stop:9393
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt& 
  visualize_sequence_length_distribution³   sx    ")
"/)/*)		
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ
   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR   B   s   								t   QualSourcec           B   
s/   e  Z e d  „ 
>temp_958|source:ó|start:9608|stop:10108
 ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_file_pat
hR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t   total
_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azome
r/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    			
						:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ 
d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ 
 D] } t	 | ƒ ^ 
>temp_959|source:ó|start:5215|stop:5715
astalib.pyRD   x   s    	!&c         C   sì   d  |  _ |  j
 j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j
 d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j
 ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R( 
  (   R*   R   R1   t   readlinet   stripR	   R   R"   R   RE   R5   R2   t
   tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainin
>temp_960|source:ó|start:3673|stop:4173
 unique_next_hashR   t   file_pointert   seekt	   total_seqR   t	   readlines
t   startswitht   resett   init_unique_hash(   R   R)   R,   R   R-   t   
l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR   C   s(    													:	c         C   
s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j
 |  j ƒ |  j | d c d 7<q i |  j d 6|  j g d 6|  j d 6d d 
6|  j | <q Wg  t	
>temp_961|source:ó|start:2156|stop:2656
dFastac           B   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ  
 g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j 
j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j
 j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib] Re
ading FASTA into memory: %ss   (   R   t	   sequencest   SequenceSourcet   
fastat   nextt   post   syst   stderrR   t   flusht   appendR	   R   ( 
  R   t   f_name( 
>temp_962|source:ó|start:6002|stop:6502
/utils/fastalib.pyt   get_seq_by_read_id    s    c         C   s   
|  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c    
     C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni  
  (   R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mnt/data/azo
mer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s    		
		c         C  
>temp_963|source:ó|start:3062|stop:3562
stalib.pyR   .   s   	R   c           B   sn   e  Z e e e d  „ Z d
 „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z
 RS(   c         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  
_ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j 
j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ 
ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_fi
le_patht   Nonet  
>temp_964|source:ó|start:4554|stop:5054
Vibrio/utils/fastalib.pyR7   _   s    	Jc         C   
s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   
R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/uti
ls/fastalib.pyR!   r   s    	c         C   s£   |  j  r› |  j d k r” |
  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d 
j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	
   R   (   R   RA
>temp_965|source:ó|start:5213|stop:5713
/fastalib.pyRD   x   s    	!&c         C   sì   d  |  _ |  
j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  
j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | |
 j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R
(   (   R*   R   R1   t   readlinet   stripR	   R   R"   R   RE   R5   R2  
 t   tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/data/azomer/read
s-for-assembly/train
>temp_966|source:ó|start:63|stop:563
 d d „  ƒ  YZ d d d „  ƒ  YZ d d d „  ƒ  YZ d d d	 „  ƒ  YZ e d 
k rÄ e e  j d ƒ Z	 e	 j d e e  j ƒ d k rº e  j d n d ƒ n  d 
S(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  „  Z e e d „ Z 
d „  Z e d „ Z d d „ Z d „  Z RS(   c         C   s   | |  _  
t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent   output
_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembly/t
rainingsets2/Vibrio/
>temp_967|source:ó|start:4232|stop:4732
 d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	  
 R   t   reverse(   t   next_regulart   hashlibt   sha1R   t   uppert	  
 hexdigestR.   R&   R	   t   sortedR   R/   R   t   total_uniqueR6   (   R 
  t   hasht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyR7   _   s    	Jc         C 
  s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   ( 
  R   (    (    sJ 
>temp_968|source:ó|start:5386|stop:5886
 St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q.
 |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R*  
 R   R1   t   readlinet   stripR	   R   R"   R   RE   R5   R2   t   tellR- 
  R>   (   R   R   t   line(    (    sJ   /mnt/data/azomer/reads-for-assembl
y/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		
#c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq
 Wt S(   N(   R6
>temp_969|source:ó|start:9602|stop:10102
 j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   quals_fi
le_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R2   t  
 total_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s    
									:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  
|  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j
 j ƒ  D] } t	 | 
>temp_970|source:ó|start:9945|stop:10445
:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ
 |  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ 
^ qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ
 ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^
 q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   
R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualsco
resRK   t   q(    (
>temp_971|source:ó|start:7805|stop:8305
™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ
(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   se
quence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint   xma
xt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget 
  hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / st
d: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i
   i   (-   t   ma
>temp_972|source:ó|start:9503|stop:10003
_ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  
|  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(  
 (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   
R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (    
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR     s    									:c         C   sF  |  j  j ƒ  
d j ƒ  |  _ d  |
>temp_973|source:ó|start:1127|stop:1627
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c     
    C   s0   | r |  j  | ƒ } n  |  j j d | ƒ d  S(   Ns   %s(   
R   R   R   (   R   R   R   (    (    sJ   /mnt/data/azomer/reads-for-ass
embly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s    iP   c       
  C   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D
] } | | | | | d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t
   join(   R   t 
>temp_974|source:ó|start:8455|stop:8955
mst   updatet   rct   GridSpect   subplotRO   R   t   subplots_adjustt   
plott   fill_betweenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   
xlimt   ylimt   figtextRE   t   textt   numpyt   meant   stdt   minR)   t
   savefigt   show(   R   t   titlet   destt   max_seq_lent	   xtickstept
	   ytickstept   pltRh   t   sequence_lengthst   seq_len_distributionR8   t 
  figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/tra
iningsets2/Vibrio/ut
>temp_975|source:ó|start:1314|stop:1814
   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/
utils/fastalib.pyR   !   s    iP   c         C   sk   t  d t | ƒ |
 ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | | | d 
!^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join(   R   t   seque
ncet   piece_lengtht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c         C   
s   |  j  j ƒ  d 
>temp_976|source:ó|start:4176|stop:4676
 |  j D] } |  j | d | f ^ qº d t ƒD] } | d ^ qä |  _ t |  
j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t   reverse(   t
   next_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	  
 t   sortedR   R/   R   t   total_uniqueR6   (   R   t   hasht   i(    ( 
   sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.
pyR7   _   s    	Jc         C   s!   |  j  r |  j ƒ 
 S|  j ƒ  Sd  S(  
>temp_977|source:ó|start:373|stop:873
 | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent
   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c    
     C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j |
 j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   unique
t   write_idt   idt   lent   idst	   write_seqt   seq(   R   t   entryt
   splitt   store_f
>temp_978|source:ó|start:4318|stop:4818
t   reverse(   t   next_regulart   hashlibt   sha1R   t   uppert	   hexdi
gestR.   R&   R	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t  
 hasht   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibr
io/utils/fastalib.pyR7   _   s    	Jc         C   s!  
 |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R  
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR!   r   s
>temp_979|source:ó|start:8013|stop:8513
zes   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s
   %st   weights   xx-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?
s6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   
x-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   pyplott
   matplotlib.gridspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R
   t   maxt   intt   figuret   rcParamst   updatet   rct   GridSpect   
subplotRO   R   t 
>temp_980|source:ó|start:5473|stop:5973
rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   (   R*   R   R
1   t   readlinet   stripR	   R   R"   R   RE   R5   R2   t   tellR-   R>   
(   R   R   t   line(    (    sJ   /mnt/data/azomer/reads-for-assembly/train
ingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    		#
c         C   s7   |  j  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S
(   N(   R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/da
ta/azomer/reads-for-
>temp_981|source:ó|start:133|stop:633
 YZ e d k rÄ e e  j d ƒ Z	 e	 j d e e  j ƒ d k rº e  j d n d 
ƒ n  d S(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  „  Z e e
 d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c         C   s  
 | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent
   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c    
     C   s`   | j 
>temp_982|source:ó|start:5137|stop:5637
    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fas
talib.pyRD   x   s    	!&c         C   sì   d  |  _ |  j j
 ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ rk | |  _ |  j d
 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t | ƒ ƒ Pn  | | j 
ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni   t    R(   
(   R*   R   R1   t   readlinet   stripR	   R   R"   R   RE   R5   R2   t 
  tellR-   R>   (  
>temp_983|source:ó|start:7728|stop:8228
s.linewidtht   gridt   colors   0.50t	   linestylet   -t	   linewidthgš™™™™™
¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï
?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequencess   seque
nce lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt
   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   h
at   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: 
%.2f / min: %s / max
>temp_984|source:ó|start:151|stop:651
 e  j d ƒ Z	 e	 j d e e  j ƒ d k rº e  j d n d ƒ n  d S(   iÿ
ÿÿÿNt   FastaOutputc           B   sJ   e  Z d  „  Z e e d „ Z d „  Z
 e d „ Z d d „ Z d „  Z RS(   c         C   s   | |  _  t | d
 ƒ |  _ d  S(   Nt   w(   t   output_file_patht   opent   output_file_ob
j(   t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fastalib.pyt   __init__   s    	c         C   s`   | 
j  r9 | r9 |  j d
>temp_985|source:ó|start:8717|stop:9217
   R   t   titlet   destt   max_seq_lent	   xtickstept	   ytickstept   pltR
h   t   sequence_lengthst   seq_len_distributionR8   t   figt   gst   ax1t
   y(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/util
s/fastalib.pyt&   visualize_sequence_length_distribution³   sx    "
)"/)/*)		
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R
   R6   R*   RŽ   (
>temp_986|source:ó|start:859|stop:1359
itt   store_frequencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/train
ingsets2/Vibrio/utils/fastalib.pyt   store   s    *c         C   s 
  |  j  j d | ƒ d  S(   Ns   >%s(   R   t   write(   R   R	   (    
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR      s    c         C   s0   | r |  j  | ƒ } n  |  j j d |
 ƒ d  S(   Ns   %s(   R   R   R   (   R   R   R   (    (    sJ   /m
nt/data/azomer/reads
>temp_987|source:ó|start:1695|stop:2195
azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    
%c         C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyR   *   s    (	   t   __name__t   __module__R   t   TrueR   R   R
   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingset
s2/Vibrio/utils/fastalib.pyR       s   		t	   ReadFastac           
B   s   e  Z d  „ 
>temp_988|source:ó|start:7854|stop:8354
   topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?
s   number of sequencess   sequence lengthi2   i   t   rotationiZ   t   siz
es   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s 
  %st   weights   xx-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s
6   total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x
-larges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   pyplott 
  matplotlib.gridspe
>temp_989|source:ó|start:3435|stop:3935
} | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni 
   t   >(   t   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mix
ed_caseR"   R	   R   R   R   t   unique_hash_dictt   unique_hash_listt   u
nique_next_hashR   t   file_pointert   seekt	   total_seqR   t	   readlinest 
  startswitht   resett   init_unique_hash(   R   R)   R,   R   R-   t   l(
    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fas
talib.pyR   C   s( 
>temp_990|source:ó|start:8988|stop:9488
uence_length_distribution³   sx    ")"
/)/*)		N(   R
   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   RŽ   (    (    ( 
   sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.
pyR   B   s   								t   QualSourcec           B   s/   e  Z e d
  „ Z d „  Z d „  Z d „  Z RS(   c         C   sÇ   | |  _  d  |  _
 | |  _ d |  _ 
>temp_991|source:ó|start:6195|stop:6695
Vibrio/utils/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ 
d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R
1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingse
ts2/Vibrio/utils/fastalib.pyR6   ¬   s    				c         C   sX  d d  l
  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d d k s_ |  j d
 k r† t j j	 d |  j ƒ t j j ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ 
 t j j	 d ƒ |
>temp_992|source:ó|start:6676|stop:7176
t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } n  d g | d }	
 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d
 d d d d d d ƒ| j d d ƒ } | j | d d !ƒ } | j t ƒ | 
j d d d d d d d d ƒ | j |	 d d d d  ƒ| j t d | d ƒ |	
 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  
| d  k r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ|
 j  t d t |	 ƒ d
>temp_993|source:ó|start:7109|stop:7609
 n  | j t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t
 d t |	 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d-
 d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j
 | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j
% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d)
 d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n X
y | j, ƒ  Wn 
>temp_994|source:ó|start:6682|stop:7182
 j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } n  d g | d }	 x |
 D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j i d d 6ƒ | j d d d
 d d d d ƒ| j d d ƒ } | j | d d !ƒ } | j t ƒ | j d 
d d d d d d d ƒ | j |	 d d d d  ƒ| j t d | d ƒ |	 d! d
 d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p5d } n  | d  
k r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d( d) d* ƒ| j  t
 d t |	 ƒ d | 
>temp_995|source:ó|start:5443|stop:5943
Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j d 7_ t S(   Ni
   t    R(   (   R*   R   R1   t   readlinet   stripR	   R   R"   R   RE  
 R5   R2   t   tellR-   R>   (   R   R   t   line(    (    sJ   /mnt/data/a
zomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR;   ˆ   s$    	
	#c         C   s7   |  j  ƒ  x& |  j ƒ  r2 
|  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R   RE   (   R   t  
 read_id(    (    sJ
>temp_996|source:ó|start:8438|stop:8938
figuret   rcParamst   updatet   rct   GridSpect   subplotRO   R   t   sub
plots_adjustt   plott   fill_betweenR   t   ylabelt   xlabelR*   t   xtick
st   ytickst   xlimt   ylimt   figtextRE   t   textt   numpyt   meant   
stdt   minR)   t   savefigt   show(   R   t   titlet   destt   max_seq_l
ent	   xtickstept	   ytickstept   pltRh   t   sequence_lengthst   seq_len_dis
tributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/data/azomer/reads
-for-assembly/traini
>temp_997|source:ó|start:8415|stop:8915
  t   maxt   intt   figuret   rcParamst   updatet   rct   GridSpect   su
bplotRO   R   t   subplots_adjustt   plott   fill_betweenR   t   ylabelt 
  xlabelR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt
   numpyt   meant   stdt   minR)   t   savefigt   show(   R   t   title
t   destt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_
lengthst   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ  
 /mnt/data/azomer/re
>temp_998|source:ó|start:9582|stop:10082
 |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R( 
  (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR  
 R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR     s    									:c         C   sF  |  j  j ƒ 
 d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | 
j ƒ  |  _ g  |  j
>temp_999|source:ó|start:1776|stop:2276
c         C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR   *   s    (	   t   __name__t   __module__R   t   TrueR   R   R 
  R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets
2/Vibrio/utils/fastalib.pyR       s   		t	   ReadFastac           B
   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t
 | ƒ |  _ x‘ |  
>temp_1000|source:ó|start:2211|stop:2711
   c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  
j j d d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |
  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i   
 i   s)   [fastalib] Reading FASTA into memory: %ss   (   R   t	   sequence
st   SequenceSourcet   fastat   nextt   post   syst   stderrR   t   flus
ht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/reads
-for-assembly/traini
>temp_1001|source:ó|start:6497|stop:6997
  C   sX  d d  l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  rŸ |  j d
 d k s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  | j t |  j ƒ
 ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } 
n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j i
 d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d d !ƒ 
} | j t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ| j
 t d | d ƒ |	 
>temp_1002|source:ó|start:1004|stop:1504
      C   s   |  j  j d | ƒ d  S(   Ns   >%s(   R   t   write(   
R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyR      s    c         C   s0   | r |  j  | ƒ } n 
 |  j j d | ƒ d  S(   Ns   %s(   R   R   R   (   R   R   R   ( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR   !   s    iP   c         C   sk   t  d t | ƒ | ƒ t |
 ƒ g } d j g  
>temp_1003|source:ó|start:4955|stop:5455
j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	 
  R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t   False( 
  R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c         C  
 sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  } | sr t | ƒ
 rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j j ƒ  t |
 ƒ ƒ Pn  | | j
>temp_1004|source:ó|start:1282|stop:1782
(   R   R   R   (   R   R   R   (    (    sJ   /mnt/data/azomer/reads-fo
r-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s    iP   c   
      C   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t  d t | ƒ d 
ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   i   (   t   rangeR
   t   join(   R   t   sequencet   piece_lengtht   tickst   x(    (    sJ
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR 
  &   s    %c   
>temp_1005|source:ó|start:1894|stop:2394
assembly/trainingsets2/Vibrio/utils/fastalib.pyR   *   s    (	   t   __name_
_t   __module__R   t   TrueR   R   R   R   R   (    (    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR       s 
  		t	   ReadFastac           B   s   e  Z d  „  Z d „  Z RS(  
 c         C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j 
j d d k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j
  j |  j j ƒ |  
>temp_1006|source:ó|start:44|stop:544
l Z d  d l Z d d d „  ƒ  YZ d d d „  ƒ  YZ d d d „  ƒ  YZ d d
 d	 „  ƒ  YZ e d k rÄ e e  j d ƒ Z	 e	 j d e e  j ƒ d k rº e  j d
 n d ƒ n  d S(   iÿÿÿÿNt   FastaOutputc           B   sJ   e  Z d  „ 
 Z e e d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c        
 C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht
   opent   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/r
eads-for-assembly/tr
>temp_1007|source:ó|start:4738|stop:5238
/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s
    	c         C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |
  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _
 | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R
"   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyRD   x   s  
>temp_1008|source:ó|start:6408|stop:6908
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s    				c
         C   sX  d d  l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  rŸ
 |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  | j t
 |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ p
â d } n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } |
 j j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d
 d !ƒ } | j t
>temp_1009|source:ó|start:2848|stop:3348
    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fas
talib.pyR   >   s    (   R   R   R   R   (    (    (    sJ   /mnt/data/a
zomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s   	
R   c           B   sn   e  Z e e e d  „ Z d „  Z d „  Z d „  Z d
 „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s
  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | | 
 _	 i  |  _ g  |  _
>temp_1010|source:ó|start:1857|stop:2357
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR   *   s    (	   t   __name__t   __module__R   t   TrueR   R   R  
 R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2
/Vibrio/utils/fastalib.pyR       s   		t	   ReadFastac           B 
  s   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t
 | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j 
d |  j j ƒ t 
>temp_1011|source:ó|start:8659|stop:9159
numpyt   meant   stdt   minR)   t   savefigt   show(   R   t   titlet  
 destt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_leng
thst   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mn
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visu
alize_sequence_length_distribution³   sx    ")
"/)/*)		
N(   R   R   R 
>temp_1012|source:ó|start:9262|stop:9762
-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   B   s   								
t   QualSourcec           B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z
 RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  | 
 _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g 
 |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R( 
  (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR  
 R   R1   R2   t  
>temp_1013|source:ó|start:6634|stop:7134
ƒ  n  | j t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t 
| ƒ d ƒ pâ d } n  d g | d }	 x | D] } |	 | c d 7<qW| j 
d	 d@ ƒ } | j j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ
 } | j | d d !ƒ } | j t ƒ | j d d d d d d d d ƒ | j |
	 d d d d  ƒ| j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ |
 j d$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  |
 j t | | d |
>temp_1014|source:ó|start:4724|stop:5224
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR!   r   s    	c         C   s£   |  j  r› |  j d k r” |  j |  j
 k  r” |  j |  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  
_ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (
   R   RA   R"   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t 
  hash_entry(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.p
>temp_1015|source:ó|start:156|stop:656
 d ƒ Z	 e	 j d e e  j ƒ d k rº e  j d n d ƒ n  d S(   iÿÿÿÿNt
   FastaOutputc           B   sJ   e  Z d  „  Z e e d „ Z d „  Z e d
 „ Z d d „ Z d „  Z RS(   c         C   s   | |  _  t | d ƒ |
  _ d  S(   Nt   w(   t   output_file_patht   opent   output_file_obj(  
 t   selfR   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/
Vibrio/utils/fastalib.pyt   __init__   s    	c         C   s`   | j  r9
 | r9 |  j d | j
>temp_1016|source:ó|start:3489|stop:3989
 |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet	
   lazy_initt   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt
   unique_hash_listt   unique_next_hashR   t   file_pointert   seekt	   tot
al_seqR   t	   readlinest   startswitht   resett   init_unique_hash(   R   R
)   R,   R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/trai
ningsets2/Vibrio/utils/fastalib.pyR   C   s(    													
:	c         
>temp_1017|source:ó|start:8142|stop:8642
   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.2f / std: %.2f / min: %s
 / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png(   i   i   (-   t  
 matplotlib.pyplott   pyplott   matplotlib.gridspect   gridspecR6   R!   R"  
 R#   R$   R   R%   R&   R   R   t   maxt   intt   figuret   rcParamst   
updatet   rct   GridSpect   subplotRO   R   t   subplots_adjustt   plott 
  fill_betweenR   t   ylabelt   xlabelR*   t   xtickst   ytickst   xlimt 
  ylimt   figtextRE
>temp_1018|source:ó|start:3777|stop:4277
t   init_unique_hash(   R   R)   R,   R   R-   t   l(    (    sJ   /mnt/dat
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   C   s(   
 													:	c         C   s  x§ |  j  ƒ  r© t j
 |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j |  j ƒ |  j | d
 c d 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg  t	 g  
|  j D] } |  j | d | f ^ qº d t ƒD] } | d ^ qä |  _ t |  j
 ƒ |  _ |  j ƒ  
>temp_1019|source:ó|start:4853|stop:5353
  r› |  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j
 rc | d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(
   Ni    i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R
   R   t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&
c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j 
j ƒ  } | sr t |
>temp_1020|source:ó|start:2393|stop:2893
 j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib]
 Reading FASTA into memory: %ss   (   R   t	   sequencest   SequenceSourcet
   fastat   nextt   post   syst   stderrR   t   flusht   appendR	   R   
(   R   t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assembly/training
sets2/Vibrio/utils/fastalib.pyR   /   s    		(c         C  
 s   |  j  j ƒ  d  S(   N(   R    R   (   R   (    (    sJ   /mnt/data/a
zomer/reads-for-asse
>temp_1021|source:ó|start:8582|stop:9082
lR*   t   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt   num
pyt   meant   stdt   minR)   t   savefigt   show(   R   t   titlet   de
stt   max_seq_lent	   xtickstept	   ytickstept   pltRh   t   sequence_lengths
t   seq_len_distributionR8   t   figt   gst   ax1t   y(    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt&   visuali
ze_sequence_length_distribution³   sx    ")
"/
>temp_1022|source:ó|start:4857|stop:5357
 |  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc
 | d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   N
i    i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   
R   t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for
-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&
c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ
  } | sr t | ƒ 
>temp_1023|source:ó|start:6742|stop:7242
 g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j i d d
 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d d !ƒ } |
 j t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ| j t d
 | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d%
 p5d } n  | d  k r`t |	 ƒ d& pZd } n  | j t | | d | ƒ d' d
( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t |	 ƒ d | ƒ D] } |
 ^ qÅd) d* ƒ| 
>temp_1024|source:ó|start:4252|stop:4752
 |  j ƒ |  _ |  j ƒ  d  S(   NR   t   counti   R	   R   t   reverse(
   t   next_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.   R&   
R	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t   hasht   i(  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyR7   _   s    	Jc         C   s!   |  j  r |  j
 ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    (    sJ 
  /mnt/data/azomer/r
>temp_1025|source:ó|start:2890|stop:3390
ssembly/trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R 
  R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.pyR   .   s   	R   c           B   sn   e  Z e e e
 d  „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	
 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ | |  _ | |  _ d 
|  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ 
|  _ |  j j d ƒ
>temp_1026|source:ó|start:8971|stop:9471
&   visualize_sequence_length_distribution³   sx    ")
"/)/*)		
N(   R   R   R   RE   R   R7   R!   RD   R;   RM   R   R6   R*   
RŽ   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyR   B   s   								t   QualSourcec           B 
  s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c         C   sÇ  
 | |  _  d  |  _ |
>temp_1027|source:ó|start:1851|stop:2351
 (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/f
astalib.pyR   *   s    (	   t   __name__t   __module__R   t   TrueR   R 
  R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/fastalib.pyR       s   		t	   ReadFastac        
   B   s   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  | 
 _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k r… t 
j j d |  j j ƒ
>temp_1028|source:ó|start:10232|stop:10732
 g  |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R( 
  t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5 
  R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		
+#+c         C   s   |  j  j ƒ  d  S(   N( 
  R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainin
gsets2/Vibrio/utils/
>temp_1029|source:ó|start:7251|stop:7751
d, | ƒ | j" d- d d. t |	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) 
d4 d5 d6 ƒ| j | d7 ƒ } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ 
| j g  ƒ | j% d9 d0 d: t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ 
f d; d6 d d< d) d= ƒ| d  k r|  j* } n  y | j+ | d> ƒ Wn | 
j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  S(A   Niÿÿÿÿi'  i    i   s   [f
astalib] Reading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.l
inewidtht   gridt 
>temp_1030|source:ó|start:2862|stop:3362
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   >
   s    (   R   R   R   R   (    (    (    sJ   /mnt/data/azomer/reads-fo
r-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   .   s   	R   c       
    B   sn   e  Z e e e d  „ Z d „  Z d „  Z d „  Z d „  Z d „  
Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d 
 |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g 
 |  _ d |  _ t |
>temp_1031|source:ó|start:2181|stop:2681
   e  Z d  „  Z d „  Z RS(   c         C   sÉ   g  |  _  g  |  _ t | 
ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d k s[ |  j j d k r… t j j d |
  j j ƒ t j j	 ƒ  n  |  j  j |  j j ƒ |  j j |  j j ƒ q$ Wt j
 j d ƒ d  S(   Niè  i    i   s)   [fastalib] Reading FASTA into memory: 
%ss   (   R   t	   sequencest   SequenceSourcet   fastat   nextt   post 
  syst   stderrR   t   flusht   appendR	   R   (   R   t   f_name(    ( 
   sJ   /mnt/data/az
>temp_1032|source:ó|start:2515|stop:3015
   t	   sequencest   SequenceSourcet   fastat   nextt   post   syst   stde
rrR   t   flusht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/d
ata/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s 
   		(c         C   s   |  j  j ƒ  d  S(   N(   R    R 
  (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.pyR   >   s    (   R   R   R   R   (    (    (    sJ 
  /mnt/data/azomer/r
>temp_1033|source:ó|start:2529|stop:3029
cest   SequenceSourcet   fastat   nextt   post   syst   stderrR   t   fl
usht   appendR	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/rea
ds-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   /   s    		(
c         C   s   |  j  j ƒ  d  S(   N(   R    R   (   R   ( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR   >   s    (   R   R   R   R   (    (    (    sJ   /mnt/data/az
omer/reads-for-assem
>temp_1034|source:ó|start:4324|stop:4824
everse(   t   next_regulart   hashlibt   sha1R   t   uppert	   hexdigestR.
   R&   R	   t   sortedR   R/   R   t   total_uniqueR6   (   R   t   hasht
   i(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/uti
ls/fastalib.pyR7   _   s    	Jc         C   s!   |  j 
 r |  j ƒ  S|  j ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    
(    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastali
b.pyR!   r   s    
>temp_1035|source:ó|start:3630|stop:4130
  unique_hash_dictt   unique_hash_listt   unique_next_hashR   t   file_point
ert   seekt	   total_seqR   t	   readlinest   startswitht   resett   init_uni
que_hash(   R   R)   R,   R   R-   t   l(    (    sJ   /mnt/data/azomer/read
s-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   C   s(    						
							:	c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ 
 ƒ j ƒ  } | |  j k rq |  j | d j |  j ƒ |  j | d c d 7<q
 i |  j d 6|  j 
>temp_1036|source:ó|start:7798|stop:8298
dthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rig
htg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s   number of sequences
s   sequence lengthi2   i   t   rotationiZ   t   sizes   xx-smallt   xmint
   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-l
arget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   total: %s / mean: %.
2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-larges   .pdfs   .png
(   i   i   (-   
>temp_1037|source:ó|start:5967|stop:6467
s-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    
s    c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R
   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils
/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  
|  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   ( 
  R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/u
tils/fastalib.pyR6  
>temp_1038|source:ó|start:8269|stop:8769
s   .png(   i   i   (-   t   matplotlib.pyplott   pyplott   matplotlib.gr
idspect   gridspecR6   R!   R"   R#   R$   R   R%   R&   R   R   t   maxt  
 intt   figuret   rcParamst   updatet   rct   GridSpect   subplotRO   R  
 t   subplots_adjustt   plott   fill_betweenR   t   ylabelt   xlabelR*   t
   xtickst   ytickst   xlimt   ylimt   figtextRE   t   textt   numpyt   
meant   stdt   minR)   t   savefigt   show(   R   t   titlet   destt   
max_seq_lent	   xtic
>temp_1039|source:ó|start:3475|stop:3975
 ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht   N
onet   namet	   lazy_initt   allow_mixed_caseR"   R	   R   R   R   t   uni
que_hash_dictt   unique_hash_listt   unique_next_hashR   t   file_pointert 
  seekt	   total_seqR   t	   readlinest   startswitht   resett   init_unique_h
ash(   R   R)   R,   R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for
-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   C   s(    								
					:	
>temp_1040|source:ó|start:9354|stop:9854
cec           B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c  
       C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _
 g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t g  |  j j ƒ  
D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R(   (   t   
quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR   R   R1   R
2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (    (    sJ   /
mnt/data/azomer/read
>temp_1041|source:ó|start:5822|stop:6322
  ƒ  x& |  j ƒ  r2 |  j | k r |  j Sq Wt S(   N(   R6   R!   R	   R 
  RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-assembly/
trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    c
         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR 
  ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j 
d ƒ d  S(   Ni  
>temp_1042|source:ó|start:3689|stop:4189
hR   t   file_pointert   seekt	   total_seqR   t	   readlinest   startswitht
   resett   init_unique_hash(   R   R)   R,   R   R-   t   l(    (    sJ   
/mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   C
   s(    													:	c         C   s  x§ |  j  ƒ 
 r© t j |  j j ƒ  ƒ j ƒ  } | |  j k rq |  j | d j |  j ƒ |  j
 | d c d 7<q i |  j d 6|  j g d 6|  j d 6d d 6|  j | <q Wg
  t	 g  |  j D] }
>temp_1043|source:ó|start:6083|stop:6583
  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   ©   s    c     
    C   s8   d |  _  d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni   
 (   R"   R*   R	   R   R   R1   R2   (   R   (    (    sJ   /mnt/data/azom
er/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s    			
	c         C   sX  d d  l  j } d d  l j } g  } |  j ƒ  xo |  j 
ƒ  rŸ |  j d d k
>temp_1044|source:ó|start:7291|stop:7791
ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ } | j j
 i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d: t | ƒ t&
 j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r| 
 j* } n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n
 Xd  S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t
   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	  
 linestylet   -t	  
>temp_1045|source:ó|start:3088|stop:3588
R   c           B   sn   e  Z e e e d  „ Z d „  Z d „  Z d „  Z d
 „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s
  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | | 
 _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _
 nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 r
ý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet
	   lazy_initt   al
>temp_1046|source:ó|start:10266|stop:10766
 q|  _ |  j d 7_ t S(   Ni   RG   R(   t    (   R1   RH   RI   R	   R
*   R‘   R’   R   R   Rj   R"   R   RE   R5   R2   RJ   (   R   t   qualscor
esRK   t   q(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vi
brio/utils/fastalib.pyR!     s*    		+#+c
         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (    (    sJ
   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR 
  9  s    c    
>temp_1047|source:ó|start:732|stop:1232
   %s|%ss   frequency:%d(   t   uniquet   write_idt   idt   lent   idst	 
  write_seqt   seq(   R   t   entryt   splitt   store_frequencies(    (   
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
t   store   s    *c         C   s   |  j  j d | ƒ d  S(   Ns
   >%s(   R   t   write(   R   R	   (    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR      s    c         
C   s0   | r |  j 
>temp_1048|source:ó|start:9033|stop:9533
")"/)/*)	
	N(   R   R   R   RE   R   R7   R!   RD   R;   RM
   R   R6   R*   RŽ   (    (    (    sJ   /mnt/data/azomer/reads-for-assembly/t
rainingsets2/Vibrio/utils/fastalib.pyR   B   s   								t   QualSou
rcec           B   s/   e  Z e d  „ Z d „  Z d „  Z d „  Z RS(   c 
        C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d  |  _ d  |  _
 g  |  _ t	 |  j  
>temp_1049|source:ó|start:4128|stop:4628
 g d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j D] } |  j | d | f
 ^ qº d t ƒD] } | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   N
R   t   counti   R	   R   t   reverse(   t   next_regulart   hashlibt  
 sha1R   t   uppert	   hexdigestR.   R&   R	   t   sortedR   R/   R   t   t
otal_uniqueR6   (   R   t   hasht   i(    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s    
	Jc       
>temp_1050|source:ó|start:288|stop:788
e e d „ Z d „  Z e d „ Z d d „ Z d „  Z RS(   c         C  
 s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   t   output_file_patht   
opent   output_file_obj(   t   selfR   (    (    sJ   /mnt/data/azomer/reads
-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   __init__   s    	c
         C   s`   | j  r9 | r9 |  j d | j d t | j ƒ f ƒ n |  
j | j ƒ |  j | j | ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   u
niquet   write_idt
>temp_1051|source:ó|start:4523|stop:5023
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s    
	Jc         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   
R   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    	c         C  
 s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j |  j } |  j d
 7_ |  j rc | d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn 
t Sd  S(   Ni    i
>temp_1052|source:ó|start:491|stop:991
(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fa
stalib.pyt   __init__   s    	c         C   s`   | j  r9 | r9 |  j d
 | j d t | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns
   %s|%ss   frequency:%d(   t   uniquet   write_idt   idt   lent   idst	
   write_seqt   seq(   R   t   entryt   splitt   store_frequencies(    (  
  sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.p
yt   store   s   
>temp_1053|source:ó|start:6585|stop:7085
s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  | j t |  j ƒ ƒ q1
 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d } n  d g
 | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j i d d 6
ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d d !ƒ } | j
 t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ| j t d |
 d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k r;| d% p
5d } n  | d  k 
>temp_1054|source:ó|start:9339|stop:9839
	t   QualSourcec           B   s/   e  Z e d  „ Z d „  Z d „  Z d „ 
 Z RS(   c         C   sÇ   | |  _  d  |  _ | |  _ d |  _ d  |  _ d 
 |  _ d  |  _ g  |  _ t	 |  j  ƒ |  _ |  j j d ƒ |  j r d  |  _ nD t
 g  |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    
R(   (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR
   R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (
    (    sJ   /mnt/d
>temp_1055|source:ó|start:414|stop:914
  w(   t   output_file_patht   opent   output_file_obj(   t   selfR   (  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyt   __init__   s    	c         C   s`   | j  r9 | r9 |  j d |
 j d t | j ƒ f ƒ n |  j | j ƒ |  j | j | ƒ d  S(   Ns  
 %s|%ss   frequency:%d(   t   uniquet   write_idt   idt   lent   idst	   
write_seqt   seq(   R   t   entryt   splitt   store_frequencies(    (    s
J   /mnt/data/azomer
>temp_1056|source:ó|start:10297|stop:10797
 Ni   RG   R(   t    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R" 
  R   RE   R5   R2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mn
t/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     
s*    		+#+c         C   s   |  j  j ƒ 
 d  S(   N(   R1   R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-a
ssembly/trainingsets2/Vibrio/utils/fastalib.pyR   9  s    c         C   s
A   d |  _  d  |  _
>temp_1057|source:ó|start:7039|stop:7539
$ ƒ | d  k r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  | j 
t | | d | ƒ d' d( d) d* ƒ| j  t d t |	 ƒ d | ƒ g  t d t |	
 ƒ d | ƒ D] } | ^ qÅd) d* ƒ| j! d+ d d, | ƒ | j" d- d d. t 
|	 ƒ t |	 ƒ d/ ƒ | j# d0 d1 d2 | d3 d d) d4 d5 d6 ƒ| j | d7 ƒ
 } | j j i d& d8 6ƒ | j t$ ƒ | j  g  ƒ | j g  ƒ | j% d9 d0 d:
 t | ƒ t& j' | ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ|
 d  k r|  j* } n
>temp_1058|source:ó|start:10706|stop:11206
rainingsets2/Vibrio/utils/fastalib.pyR   9  s    c         C   sA   d | 
 _  d  |  _ d  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"  
 R*   R	   R‘   R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   <  s    					
(   R   R   R   R   R!   R   R6   (    (    (    sJ   /mnt/data/azomer/re
ads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR     s   		t 
  __main__i   R‚   
>temp_1059|source:ó|start:1432|stop:1932
P   c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t  d t
 | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   i   (   t
   rangeR   t   join(   R   t   sequencet   piece_lengtht   tickst   x( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyR   &   s    %c         C   s   |  j  j ƒ  d  S(   N(   R   
t   close(   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainings
ets2/Vibrio/utils/fa
>temp_1060|source:ó|start:7456|stop:7956
 ƒ t& j( | ƒ t) | ƒ t | ƒ f d; d6 d d< d) d= ƒ| d  k r|  j* }
 n  y | j+ | d> ƒ Wn | j+ | d? ƒ n Xy | j, ƒ  Wn n Xd  
S(A   Niÿÿÿÿi'  i    i   s   [fastalib] Reading: %ss   g      Y@i   t   fi
gsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht   gridt   colors   0.50t	   line
stylet   -t	   linewidthgš™™™™™¹?i   t   leftgš™™™™™©?t   bottomg¸…ëQ¸ž?t 
  topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s
   number of sequenc
>temp_1061|source:ó|start:3210|stop:3710
 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ | |  _ | |  _ d 
|  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ 
|  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ 
r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t
   fasta_file_patht   Nonet   namet	   lazy_initt   allow_mixed_caseR"   R	 
  R   R   R   t   unique_hash_dictt   unique_hash_listt   unique_next_hash
R   t   file_point
>temp_1062|source:ó|start:4284|stop:4784
   NR   t   counti   R	   R   t   reverse(   t   next_regulart   hashlib
t   sha1R   t   uppert	   hexdigestR.   R&   R	   t   sortedR   R/   R   t
   total_uniqueR6   (   R   t   hasht   i(    (    sJ   /mnt/data/azomer/rea
ds-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s    
	Jc         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S(   N(   R
   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-ass
embly/trainingsets2/
>temp_1063|source:ó|start:1548|stop:2048
 d !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join(   R   t  
 sequencet   piece_lengtht   tickst   x(    (    sJ   /mnt/data/azomer/reads-
for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c       
  C   s   |  j  j ƒ  d  S(   N(   R   t   close(   R   (    (    sJ   /
mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   * 
  s    (	   t   __name__t   __module__R   t   TrueR   R   R   R   R   
(    (    (    sJ   
>temp_1064|source:ó|start:7857|stop:8357
 topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s 
  number of sequencess   sequence lengthi2   i   t   rotationiZ   t   sizes
   xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %
st   weights   xx-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6  
 total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-la
rges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   pyplott   m
atplotlib.gridspect
>temp_1065|source:ó|start:4423|stop:4923
ortedR   R/   R   t   total_uniqueR6   (   R   t   hasht   i(    (    sJ  
 /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   
_   s    	Jc         C   s!   |  j  r |  j ƒ  S|  j
 ƒ  Sd  S(   N(   R   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data
/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!   r   s    
	c         C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |  j 
|  j } |  j d
>temp_1066|source:ó|start:3892|stop:4392
gsets2/Vibrio/utils/fastalib.pyR   C   s(    													:
	c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } | |  j 
k rq |  j | d j |  j ƒ |  j | d c d 7<q i |  j d 6|  j g
 d 6|  j d 6d d 6|  j | <q Wg  t	 g  |  j D] } |  j | d | f ^
 qº d t ƒD] } | d ^ qä |  _ t |  j ƒ |  _ |  j ƒ  d  S(   NR  
 t   counti   R	   R   t   reverse(   t   next_regulart   hashlibt   sha
1R   t   uppert	  
>temp_1067|source:ó|start:5020|stop:5520
  i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R 
  t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!&c
         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x |  j j ƒ  }
 | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j | 
 j j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  
j d 7_ t S(  
>temp_1068|source:ó|start:3190|stop:3690
 Z d „  Z d	 d	 d	 d	 d „ Z RS(   c         C   s  | |  _  d  |  _ |
 |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ d
 |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ 
 D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S( 
  Ni    t   >(   t   fasta_file_patht   Nonet   namet	   lazy_initt   allo
w_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   unique_hash_listt
   unique_next_hash
>temp_1069|source:ó|start:4818|stop:5318
    	c         C   s£   |  j  r› |  j d k r” |  j |  j k  r” |  j |
  j |  j } |  j d 7_ |  j rc | d n | d j ƒ  |  _ | d |  _
 | d |  _	 t St Sn t Sd  S(   Ni    i   R   R	   R   (   R   RA   R
"   R.   R/   R-   R>   R   R	   R   R   t   False(   R   t   hash_entry( 
   (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fast
alib.pyRD   x   s    	!&c         C   sì   d  |  _ |  j j
 ƒ  d j ƒ  |  _ 
>temp_1070|source:ó|start:10155|stop:10655
 |  j  j |  j  j ƒ  t | ƒ ƒ Pn  | d | j ƒ  7} q7 | j ƒ  |  _ g 
 |  j j ƒ  D] } t	 | ƒ ^ q|  _ |  j d 7_ t S(   Ni   RG   R(   t
    (   R1   RH   RI   R	   R*   R‘   R’   R   R   Rj   R"   R   RE   R5   R
2   RJ   (   R   t   qualscoresRK   t   q(    (    sJ   /mnt/data/azomer/read
s-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		
+#+c         C   s   |  j  j ƒ  d  S(   N(   R
1   R   (   R   (
>temp_1071|source:ó|start:7662|stop:8162
ading: %ss   g      Y@i   t   figsizei   i   gÍÌÌÌÌÌì?s   axes.linewidtht 
  gridt   colors   0.50t	   linestylet   -t	   linewidthgš™™™™™¹?i   t   le
ftgš™™™™™©?t   bottomg¸…ëQ¸ž?t   topgffffffî?t   rightg\Âõ(\ï?t   blackt 
  alphag333333Ó?t   y2g333333Ã?s   number of sequencess   sequence lengthi2  
 i   t   rotationiZ   t   sizes   xx-smallt   xmint   xmaxt   ymint   ym
axg      4@g      à?g¸…ëQ¸î?s   %st   weights   xx-larget   hat   centeri	
   s   axes.edgecol
>temp_1072|source:ó|start:6406|stop:6906
reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR6   ¬   s    				
c         C   sX  d d  l  j } d d  l j } g  } |  j ƒ  xo |  j ƒ  
rŸ |  j d d k s_ |  j d k r† t j j	 d |  j ƒ t j j ƒ  n  | j
 t |  j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ
 pâ d } n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } 
| j j i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j |
 d d !ƒ } | j 
>temp_1073|source:ó|start:2400|stop:2900
  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [fastalib] Readin
g FASTA into memory: %ss   (   R   t	   sequencest   SequenceSourcet   fast
at   nextt   post   syst   stderrR   t   flusht   appendR	   R   (   R
   t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib.pyR   /   s    		(c         C   s   |
  j  j ƒ  d  S(   N(   R    R   (   R   (    (    sJ   /mnt/data/azomer/r
eads-for-assembly/tr
>temp_1074|source:ó|start:9706|stop:10206
   R	   t   qualst	   quals_intR   R   R1   R2   t   total_qualsR   R4   R5 
  R6   (   R   R   R,   R8   (    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vibrio/utils/fastalib.pyR     s    									
:c         C   sF  |  j  j ƒ  d j ƒ  |  _ d  |  _ d  |  _ d } xÂ 
|  j  j ƒ  } | s¬ t | ƒ r¥ | j ƒ  |  _ g  |  j j ƒ  D] } t	 | ƒ ^
 qw |  _ |  j d 7_ t St Sn  | j d ƒ râ |  j  j |  j  j ƒ  t | ƒ 
ƒ Pn  | d | j
>temp_1075|source:ó|start:925|stop:1425
assembly/trainingsets2/Vibrio/utils/fastalib.pyt   store   s    *c   
      C   s   |  j  j d | ƒ d  S(   Ns   >%s(   R   t   write(   R
   R	   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio
/utils/fastalib.pyR      s    c         C   s0   | r |  j  | ƒ } n  
|  j j d | ƒ d  S(   Ns   %s(   R   R   R   (   R   R   R   (  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyR   !   s   
>temp_1076|source:ó|start:4700|stop:5200
iqueR;   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingse
ts2/Vibrio/utils/fastalib.pyR!   r   s    	c         C   s£   |  j  r› | 
 j d k r” |  j |  j k  r” |  j |  j |  j } |  j d 7_ |  j rc |
 d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd  S(   Ni  
  i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	   R   R 
  t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/reads-for-as
sembly/trainingsets2
>temp_1077|source:ó|start:1311|stop:1811
 R   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibr
io/utils/fastalib.pyR   !   s    iP   c         C   sk   t  d t | ƒ
 | ƒ t | ƒ g } d j g  t  d t | ƒ d ƒ D] } | | | | | d
 !^ qE ƒ S(   Ni    s   i   (   t   rangeR   t   join(   R   t   se
quencet   piece_lengtht   tickst   x(    (    sJ   /mnt/data/azomer/reads-for
-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   &   s    %c         C
   s   |  j  j ƒ  
>temp_1078|source:ó|start:3404|stop:3904
|  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  
j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   n
amet	   lazy_initt   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_
dictt   unique_hash_listt   unique_next_hashR   t   file_pointert   seekt	 
  total_seqR   t	   readlinest   startswitht   resett   init_unique_hash(   R
   R)   R,   R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly
/trainingsets2/Vibri
>temp_1079|source:ó|start:553|stop:1053
s2/Vibrio/utils/fastalib.pyt   __init__   s    	c         C   s`   | j 
 r9 | r9 |  j d | j d t | j ƒ f ƒ n |  j | j ƒ |  j | j 
| ƒ d  S(   Ns   %s|%ss   frequency:%d(   t   uniquet   write_idt   id
t   lent   idst	   write_seqt   seq(   R   t   entryt   splitt   store_f
requencies(    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibri
o/utils/fastalib.pyt   store   s    *c         C   s   |  j  j d 
| ƒ d  S(   Ns
>temp_1080|source:ó|start:2918|stop:3418
/utils/fastalib.pyR   >   s    (   R   R   R   R   (    (    (    sJ   /
mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   . 
  s   	R   c           B   sn   e  Z e e e d  „ Z d „  Z d „  Z 
d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS(   c      
   C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |
  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j 
r£ d  |  _ nD t g 
>temp_1081|source:ó|start:5893|stop:6393
 R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/azomer/reads-for-
assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_read_id    s    
c         C   s   |  j  j ƒ  d  S(   N(   R1   R   (   R   (  
  (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fasta
lib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d  |  _ g  |  _ 
|  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1   R2   (   R  
 (    (    sJ   /mnt
>temp_1082|source:ó|start:5344|stop:5844
 sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ |  j j |  j 
j ƒ  t | ƒ ƒ Pn  | | j ƒ  7} q. |  j rÊ | n	 | j ƒ  |  _ |  j 
d 7_ t S(   Ni   t    R(   (   R*   R   R1   t   readlinet   stripR	  
 R   R"   R   RE   R5   R2   t   tellR-   R>   (   R   R   t   line(    ( 
   sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.
pyR;   ˆ   s$    		#c         C   s7   |  j  
ƒ  x& |  j ƒ  r2 |
>temp_1083|source:ó|start:5876|stop:6376
  N(   R6   R!   R	   R   RE   (   R   t   read_id(    (    sJ   /mnt/data/
azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   get_seq_by_
read_id    s    c         C   s   |  j  j ƒ  d  S(   N(   R1   R
   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib.pyR   ©   s    c         C   s8   d |  _  d  |  _ d
  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R   R   R1
   R2   (   R   ( 
>temp_1084|source:ó|start:6653|stop:7153
j ƒ ƒ q1 W|  j ƒ  t j j	 d ƒ | sé t | ƒ t t | ƒ d ƒ pâ d 
} n  d g | d }	 x | D] } |	 | c d 7<qW| j d	 d@ ƒ } | j j
 i d d 6ƒ | j d d d d d d d ƒ| j d d ƒ } | j | d d 
!ƒ } | j t ƒ | j d d d d d d d d ƒ | j |	 d d d d  ƒ|
 j t d | d ƒ |	 d! d d d d d" ƒ| j d# ƒ | j d$ ƒ | d  k
 r;| d% p5d } n  | d  k r`t |	 ƒ d& pZd } n  | j t | | d |
 ƒ d' d( d) d* ƒ
>temp_1085|source:ó|start:762|stop:1262
t   uniquet   write_idt   idt   lent   idst	   write_seqt   seq(   R   t
   entryt   splitt   store_frequencies(    (    sJ   /mnt/data/azomer/reads-f
or-assembly/trainingsets2/Vibrio/utils/fastalib.pyt   store   s    *c 
        C   s   |  j  j d | ƒ d  S(   Ns   >%s(   R   t   write( 
  R   R	   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vib
rio/utils/fastalib.pyR      s    c         C   s0   | r |  j  | ƒ } 
n  |  j j d | ƒ
>temp_1086|source:ó|start:182|stop:682
j ƒ d k rº e  j d n d ƒ n  d S(   iÿÿÿÿNt   FastaOutputc         
  B   sJ   e  Z d  „  Z e e d „ Z d „  Z e d „ Z d d „ Z d „  
Z RS(   c         C   s   | |  _  t | d ƒ |  _ d  S(   Nt   w(   
t   output_file_patht   opent   output_file_obj(   t   selfR   (    (    s
J   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyt
   __init__   s    	c         C   s`   | j  r9 | r9 |  j d | j d t
 | j ƒ f ƒ 
>temp_1087|source:ó|start:3565|stop:4065
met	   lazy_initt   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_d
ictt   unique_hash_listt   unique_next_hashR   t   file_pointert   seekt	  
 total_seqR   t	   readlinest   startswitht   resett   init_unique_hash(   R
   R)   R,   R   R-   t   l(    (    sJ   /mnt/data/azomer/reads-for-assembly/
trainingsets2/Vibrio/utils/fastalib.pyR   C   s(    												
	:	c         C   s  x§ |  j  ƒ  r© t j |  j j ƒ  ƒ j ƒ  } |
 |  j k rq |  j |
>temp_1088|source:ó|start:10480|stop:10980
-assembly/trainingsets2/Vibrio/utils/fastalib.pyR!     s*    		
+#+c         C   s   |  j  j ƒ  d  S(   N(   R1   R
   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/V
ibrio/utils/fastalib.pyR   9  s    c         C   sA   d |  _  d  |  _ d
  |  _ d  |  _ g  |  _ |  j j d ƒ d  S(   Ni    (   R"   R*   R	   R‘ 
  R’   R   R1   R2   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assemb
ly/trainingsets2/Vib
>temp_1089|source:ó|start:2578|stop:3078
t   syst   stderrR   t   flusht   appendR	   R   (   R   t   f_name(   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR   /   s    		(c         C   s   |  j  j ƒ  d  S(
   N(   R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/
trainingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R   R   (
    (    (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/util
s/fastalib.pyR   . 
>temp_1090|source:ó|start:4929|stop:5429
|  j rc | d n | d j ƒ  |  _ | d |  _ | d |  _	 t St Sn t Sd 
 S(   Ni    i   R   R	   R   (   R   RA   R"   R.   R/   R-   R>   R   R	
   R   R   t   False(   R   t   hash_entry(    (    sJ   /mnt/data/azomer/r
eads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyRD   x   s    	!
&c         C   sì   d  |  _ |  j j ƒ  d j ƒ  |  _ d } x | 
 j j ƒ  } | sr t | ƒ rk | |  _ |  j d 7_ t St	 Sn  | j d ƒ r¨ 
|  j j |  j j ƒ 
>temp_1091|source:ó|start:2224|stop:2724
   C   sÉ   g  |  _  g  |  _ t | ƒ |  _ x‘ |  j j ƒ  r´ |  j j d d 
k s[ |  j j d k r… t j j d |  j j ƒ t j j	 ƒ  n  |  j  j |  j 
j ƒ |  j j |  j j ƒ q$ Wt j j d ƒ d  S(   Niè  i    i   s)   [
fastalib] Reading FASTA into memory: %ss   (   R   t	   sequencest   Sequenc
eSourcet   fastat   nextt   post   syst   stderrR   t   flusht   appendR
	   R   (   R   t   f_name(    (    sJ   /mnt/data/azomer/reads-for-assembly
/trainingsets2/Vibri
>temp_1092|source:ó|start:4272|stop:4772
 ƒ  d  S(   NR   t   counti   R	   R   t   reverse(   t   next_regular
t   hashlibt   sha1R   t   uppert	   hexdigestR.   R&   R	   t   sortedR  
 R/   R   t   total_uniqueR6   (   R   t   hasht   i(    (    sJ   /mnt/dat
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR7   _   s   
 	Jc         C   s!   |  j  r |  j ƒ  S|  j ƒ  Sd  S
(   N(   R   t   next_uniqueR;   (   R   (    (    sJ   /mnt/data/azomer/r
eads-for-assembly/tr
>temp_1093|source:ó|start:3253|stop:3753
 |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i
  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD 
t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  
j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_patht   Nonet   namet	   l
azy_initt   allow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   
unique_hash_listt   unique_next_hashR   t   file_pointert   seekt	   total_s
eqR   t	   readlines
>temp_1094|source:ó|start:2503|stop:3003
s   (   R   t	   sequencest   SequenceSourcet   fastat   nextt   post   
syst   stderrR   t   flusht   appendR	   R   (   R   t   f_name(    (   
 sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.py
R   /   s    		(c         C   s   |  j  j ƒ  d  S(   N(
   R    R   (   R   (    (    sJ   /mnt/data/azomer/reads-for-assembly/train
ingsets2/Vibrio/utils/fastalib.pyR   >   s    (   R   R   R   R   (    (
    (    sJ   /mnt/d
>temp_1095|source:ó|start:1185|stop:1685
      s    c         C   s0   | r |  j  | ƒ } n  |  j j d | ƒ 
d  S(   Ns   %s(   R   R   R   (   R   R   R   (    (    sJ   /mnt/dat
a/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastalib.pyR   !   s   
 iP   c         C   sk   t  d t | ƒ | ƒ t | ƒ g } d j g  t
  d t | ƒ d ƒ D] } | | | | | d !^ qE ƒ S(   Ni    s   i  
 (   t   rangeR   t   join(   R   t   sequencet   piece_lengtht   tickst
   x(    (    sJ   
>temp_1096|source:ó|start:3067|stop:3567
b.pyR   .   s   	R   c           B   sn   e  Z e e e d  „ Z d „  Z
 d „  Z d „  Z d „  Z d „  Z	 d „  Z d „  Z d	 d	 d	 d	 d „ Z RS( 
  c         C   s  | |  _  d  |  _ | |  _ | |  _ d |  _ d  |  _ d 
 |  _ g  |  _ | |  _	 i  |  _ g  |  _ d |  _ t |  j  ƒ |  _ |  j j d
 ƒ |  j r£ d  |  _ nD t g  |  j j ƒ  D] } | j d ƒ r¶ | ^ q¶ ƒ | 
 _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(   Ni    t   >(   t   fasta_file_pa
tht   Nonet   name
>temp_1097|source:ó|start:3269|stop:3769
| |  _ | |  _ d |  _ d  |  _ d  |  _ g  |  _ | |  _	 i  |  _ g  |  _ 
d |  _ t |  j  ƒ |  _ |  j j d ƒ |  j r£ d  |  _ nD t g  |  j j ƒ
  D] } | j d ƒ r¶ | ^ q¶ ƒ |  _ |  j ƒ  |  j	 rý |  j ƒ  n  d  S(
   Ni    t   >(   t   fasta_file_patht   Nonet   namet	   lazy_initt   all
ow_mixed_caseR"   R	   R   R   R   t   unique_hash_dictt   unique_hash_list
t   unique_next_hashR   t   file_pointert   seekt	   total_seqR   t	   readl
inest   startswitht
>temp_1098|source:ó|start:9582|stop:10082
 |  j j ƒ  D] } | j d ƒ r’ | ^ q’ ƒ |  _ |  j ƒ  d  S(   Ni    R( 
  (   t   quals_file_pathR*   R+   R,   R"   R	   t   qualst	   quals_intR  
 R   R1   R2   t   total_qualsR   R4   R5   R6   (   R   R   R,   R8   (   
 (    sJ   /mnt/data/azomer/reads-for-assembly/trainingsets2/Vibrio/utils/fastal
ib.pyR     s    									:c         C   sF  |  j  j ƒ 
 d j ƒ  |  _ d  |  _ d  |  _ d } xÂ |  j  j ƒ  } | s¬ t | ƒ r¥ | 
j ƒ  |  _ g  |  j
>temp_1099|source:ó|start:7858|stop:8358
topgffffffî?t   rightg\Âõ(\ï?t   blackt   alphag333333Ó?t   y2g333333Ã?s  
 number of sequencess   sequence lengthi2   i   t   rotationiZ   t   sizes 
  xx-smallt   xmint   xmaxt   ymint   ymaxg      4@g      à?g¸…ëQ¸î?s   %s
t   weights   xx-larget   hat   centeri	   s   axes.edgecolorg{®Gáz”?s6   
total: %s / mean: %.2f / std: %.2f / min: %s / max: %st   vagš™™™™™é?s   x-lar
ges   .pdfs   .png(   i   i   (-   t   matplotlib.pyplott   pyplott   ma
tplotlib.gridspect 
