Combining Text Mining and Sequence Analysis to Discover Protein Functional Regions E. Eskin and E. Agichtein Pacific Symposium on Biocomputing 9:288-299(2004) Æ ! ! " # # ! " ! # " $ % & " ' " ! $ $ $ $ ( Æ ) $ ( # *+, ( -+, ( ' $ ( " ' . " ( / ' $ " # " $ $ # $ & $ $ $ ! " " 0 ( 0 . ( ( 1 $ $ " ( . 2332 ! 4 ( 0 & $ " $ Initial (seed) labeled sequence text annotations text Seq. Extended training set Train Text Classifier text Seq. Predict class of unlabeled text annotations Predict class of unlabeled sequences and annotations text Step 1: Extend Training Set by Exploiting Text Annotations Train Joint Classifier Seq. text Seq. Step 2: Exploit both Text and Sequence information in the Extended Training Set $% & ' ( ) * + ! . 2332 ( ) ( " 5 6.7 ! ! $ ) * ) $ $ 1 $ " ! $ " 1 $ & $ " % & " $ ! $ & $ " $ $ 7 " ' " ' $ " 8 $ % *99: ! ! Æ $ ! $ $ $ ) $ 0' 5#! $ 1 Æ ( $ ! . ! $ 7 $ $ " $ 0 $ $ 7 " '55; $ " '55; $ $ " $ ! "# ; $ 0 " $ $ ' $ & % $ $ 7 $ Æ 1 ! $ & $ " " #$ ! $ ! " ; * < 3 7 $ $ = ¾ 7 $ 0 Æ ) $ $ = % #$ "$ ! 23 ) * 3 ! = ¾ ! 7 Æ ' $ ' 4 $ $ ! / 8 ; ! " = * 3 * ' ( * $ ( 6 &'( " ) %! 0 " $ ' $ " ! $ 0 " = > > > ! " 4 $ ! ' $ " 4 $ ** ) ! " ? $ ! " $ " + = * " Ê * 0 " , , Ê Ê " = , " > ' 2 " " " - " - " ! " 4 $( 7 ( " " ) $ " " $ % & ' ' # " $ ! " = = - , @ = = , = ! " % $ , ' Æ 0 ) , ! " $ 0 " ' $ . !$! / )$0) 0 ) $ $ & $ " 23, ( ! 0 ( 0 & $ " $ " ) 100 keywords text fields text-all 95 90 Precision 85 80 75 70 65 20 30 40 50 Recall 60 70 ,% ' ) # # * -**. /0 . ( 0 " $ ( $ 0 0' 5#!@3 $ ' " ( 7 0' 5#! $ ( 0 $ ( ! 2 7 *33333 0' 5#! *+, $ ( ( )* + ! 0 , $ $ 1 ( $ % ( $ ( ( $ ! ( . ' ' ' 80 '55; " ( 0' 5#! $ ! " *@@+@ $ ! $ ( 0' 5#! ! . 2332 , !$ " '55; ! $ " 0 . $ " $ " $ ! $ $ ) 2 0 " 4 . " 7 ) 2 $ " ( ) $ :3, $ '55; " ! $ " $ 0' 5#! ! 0' 5#! ) 2 0 " ! " A B $ . $ " $ C2333 $ " ! $ & $ " ! "# # $ -2 $ " $ $% ( ! # ' ( 0 $ 23, 0 D $ " " & $ " ) " 4 ! $ " $ " " ) $ $ ) ( " ! 2 0 #8 4 ! #8 E E " +3 7 * 3 +3 $ % ( ' ' )* 0 ( 2@ ( ( ( .F 0 $ 23 .F : ! 2 .F # &&&&&&& #&#&& &&+&&+&&,&&+&&+&&+&&,&&+&&+&&+&& .../../././... 0#1/0#...)2.& ##..#.. #..# "/# +2!&20&##45, !"../!,!" !" " &&&&& && #&#&& &&+&&&"&& #..#.#.#. ...& ###..# 6!7&&## /6!27 !" $ #% ' ()* (-* ' (3* ' ' ,% ' # 12* % 3)4 5 + 5 6 7 5 ($889:$:8;;09 354 < 5 ( 7 +$888,9=098 3(4 ( 5 7 ( 5 $88>$>$//8$/9> 1 !!! 0 $ $ $ Æ $ $ $ ! " $ 0 & $ " $ & " 0 ( ! " 0 ( 0 $ $ ( 0 * ; ' 8 & ( " ( - . **@G*23 777' 5 *99+ 2 7 H % ' H & 6 D 7 - . 2-+D*+3*G*+-* *99@ - H H 8 % @ + C I : 9 *3 ** *2 *- *@ *+ *C *I *: . *@*3D:@CG:+C *99: % 8 8 & *9*D++GI2 *99@ 8 F ; ; 0 . ! D 7 " ' & &# , $ . $ /&.0 H < 2332 8 F ; ; 0 . " ' & + 1 ( & 2 /1(&0 2332 8 F ; ; 7 8 0 . " ! 8 1 233- . % ' ( $ . *: *D I:G :C J 2332 . % ( & ***2D2:-CG@I 6 2332 7 % ! 8 ' & 334 " " $ ) *99: H ! 7 H 7 ! $ $ - ) ' @DCIG:* 233- 7 % 7 ! 0' 5#! D - - I+D-*2G-*C *99I 0 0 8 ) 4 ' ( " - ) *99+ ; ; 7 ! 1 233- ; ; 0 . K 7 " ! 1 233- . ) *99: ? . F 1 #8 " $ " 23*D2+G-- *99C . 5 8 % .F D ( 1 + ' -**D-9IG9 J 233-
© Copyright 2026 Paperzz