Swiss-prot$B$X$N(Bfunction$B$N%?%0$NIU$1J}(B

$B#3(B

'+':$B5!G=4{CN$N>r7o(B=true $B$+$D(B $B?dDj5!G=$N>r7o(B=false
'?':$B5!G=4{CN$N>r7o(B=true $B$+$D(B $B?dDj5!G=$N>r7o(B=true
'U':$B5!G=4{CN$N>r7o(B=false
$B$A$J$_$K(BSwiss-prot rel. 38$B$G$N?t$NFbLu$O!"(B
'+'  :63169 (78.9 %)
'?'  : 6557 ( 8.1 %)
'U'  :10274 (12.8 %)
total:80000
$B$K$J$j$^$9!#(B

$B5!G=4{CN$N>r7o(B

$Br7o$N$I$l$+$,$"$F$O$^$l$P!"4{CN$H$7$^$9!#(B

  1. "KW"$B$NMs$K(Binformative$B$J(Bkeyword$B$,#1$D0J>e$"$k$3$H(B
  2. "DE"$B$NMs$K(Binformative$B$JJ8;z$,#6J8;z0J>e$"$k$3$H(B
  3. "CC"$B$NMs$K(B"FUNCTION"$B$+(B"CATALYTIC ACTIVITY"$B$N5-=R$,$"$k$3$H(B
$B>r7o$N>\:Y(B
  1. informative$B$G$J$$(BKeyword$B$O!"0J2(B $B5!G=4{CN$H$O07$o$l$^$;$s!#(B
     "Hypothetical protein", "Transmembrane", "Inner membrane",
     "Repeat", "Polymorphism", "Signal", "3D-structure",
     "Coiled coil", "Multigene family", "Antigen"
    
  2. "DE"$B$+$i(Binformative$B$JJ8;z$@$1Cj=P$9$k$K$O!)(B

    $BNc$($P!"0J2(BDESCRIPTION$B$O(Bfunction$B$N>pJs$r4^$s$G$$$J$$$H(B $B9M$($i$l$^$9!#(B

    DE   APAG PROTEIN.
    DE   21.1 KD PROTEIN IN PANB-HTRE INTERGENIC REGION.
    DE   HYPOTHETICAL 24.8 KD PROTEIN IN DJLA-RLUA INTERGENIC REGION.
    DE   10 KD PROTEIN PRECURSOR (CLONE PSAS10).
    DE   110 KD ANTIGEN (PK110) (FRAGMENT).
    DE   PROTEIN Z600
    
    $B$h$C$F!"$3$l$i$+$i!"(Binformative$B$G$J$$8l!"Nc$($P(B"PROTEIN", "INTERGENIC", "REGION", "CLONE", "FRAGMENT"$B$J$I$rA4$F=|$-!"$=$l$G$b;D$C$?J8;z$r(Binformative$B$JJ8;z$H$7$^$9!#(B $B$3$N>r7o$r7h$a$k$N$O(Binformative$B$G$J$$$H;W$o$l$k(B DESCRIPTION$B$NCO$O(B $B$"$k$H;W$$$^$9!#(BDE$B$NJ8;zNs$r0z?t$H$7$F(Binformative$B$JItJ,$@$1$rJV$9(B perl$B$N4X?t$O0J2(B
    sub Get_Informative_String{
     local($inputstr) = $_[0];
     ## $_[1] : GENE NAME Array (reference);
     local($g);
    
     $inputstr =~s/\.$//g;
     
     foreach $g (@{$_[1]})  { $inputstr =~s/$g //; $inputstr =~s/$g$//; } 
     
     $inputstr =~s/HYPOTHETICAL//g;
     $inputstr =~s/PUTATIVE//g;
     $inputstr =~s/PROBABLE//g;
     $inputstr =~s/PRECURSOR//g;
     $inputstr =~s/FROM//g;
     $inputstr =~s/UNKNOWN//g;
     $inputstr =~s/ANTIGEN//g;
     $inputstr =~s/FRAGMENT//g;
     $inputstr =~s/CLONE//g;
     $inputstr =~s/GENE//g;
     $inputstr =~s/ .{1,10}-.{1,10} INTERGENIC REGION\s?\(?.*\)?//g;
     $inputstr =~s/PROTEIN IN .{1,15}REGION\s?\(?.*\)?//g;
     $inputstr =~s/IN CHROMOSOME//g;
     $inputstr =~s/IN .{0,15}CHROMOSOME//g;
     $inputstr =~s/PROTEIN//g;
     $inputstr =~s/[0-9]+\.[0-9] KD//g;
     $inputstr =~s/[0-9]+ KD//g;
     $inputstr =~s/\(ORF.+\)//g;
     $inputstr =~s/-LIKE//g;
     $inputstr =~s/LIKE//g;
     $inputstr =~s/INTERGENIC//g;
     $inputstr =~s/REGION//g;
     $inputstr =~s/LARGE//g;
     $inputstr =~s/SMALL//g;
     $inputstr =~s/SUBUNIT//g;
     $inputstr =~s/VERY//g;
     $inputstr =~s/CHAIN//g;
     $inputstr =~s/SPOTS \w+\/\w+\/\w+\/\w+\/\w+//g;
     $inputstr =~s/SPOTS \w+\/\w+\/\w+\/\w+//g;
     $inputstr =~s/SPOTS \w+\/\w+\/\w+//g;
     $inputstr =~s/SPOTS \w+\/\w+//g;
     $inputstr =~s/\(SPOT.+\)//g;
     $inputstr =~s/2D-PAGE SPOTS//g;
     $inputstr =~s/2D-PAGE//g;
     $inputstr =~s/2D_\w+ //g;
     $inputstr =~s/HOMOLOG//g;
     $inputstr =~s/HOMOLOGY//g;
     $inputstr =~s/ IN //g;
     $inputstr =~s/\W//g;
     $inputstr =~s/\d//g;
     return($inputstr);
    }
    
    
    
$B?dDj5!G=$N>r7o(B

Swiss-prot$B$N%^%K%e%"%k(B "How is biochemical information assigned to sequence entries ?" (http://www.expasy.ch/cgi-bin/lists?annbioch.txt)$B$K$h$l$P!"A4%2%N%`(B $B%G!(Bannotation$B$K$D$$$F!"0J2l9gJ,$1$,$7$F$"$j$^$9!#(B

(III) Protein sequence data from translation of genome sequencing data

  1. identical to an existing sequence in SWISS-PROT from the same organism,
  2. identical to an existing sequence in SWISS-PROT from a different
     organism which may or may not be related
  3. strong similarity (i.e. many residues are conserved residues), over the
     entire sequence, to an existing entry (from a related or different
     organism)
  4. strong similarity only at regions in the sequence (from same, related
     or different organism)
  5. some similarity to one or more existing entries
  6. no similarity to any existing entries
$B$3$N$&$A!"%1!(B1,2$B$O4{$KF10l$NG[Ns$,EPO?$5$l$F$$$?>l9g$G$"$j!"(B $B$=$N(Bannotation$B$Ol9g$,9b$$$H;W$o$l$^$9!#(B $B%1!(B3-6$B$O!"F10l$NG[Ns$,(BSwiss-prot$B$KEPO?$7$F$J$+$C$?>l9g$G$"$j!"(B $B$3$l$i$K$D$$$F$N(Bannotation$B$OAjF1@-$N$"$k%(%s%H%j$+$i?d;!$5$l$?$b$N(B $B$K$J$k$O$:$G$9!#$3$N%I%-%e%a%s%H$NNc$K$h$k$H!"(B $B0J2(B
CASE  3         4             5             6 
DE    PROBABLE  PROBABLE      PUTATIVE      HYPOTHETICAL
KW    -         Hypothetical  Hypothetical  Hypothetical
$B$h$C$F!"l9g!"C1$J$kAjF1@-$@$1$G(B $B5!G=$N(Bannotation$B$,$5$l$?$H$7$^$9!J?dDj5!G=$N>r7o!K!#(B
  1. "DE"$B$NMs$K(B"PROBABLE"$B$+(B"PUTATIVE"$B$+(B"HYPOTHETICAL"$B$r4^$`(B
  2. "Hypothetical protein"$B$H$$$&(BKeyword$B$,$"$k(B