. +,-) * ) %& '("#$ !"#$
11th International CSI Computer Conference (CSICC’2006), School of Computer Science, IPM, Jan. 24-26, 2006, Tehran, Iran.
ﻃﺮﺍﺣﻲ ﻳﮏ ﺍﻟﮕﻮﺭﻳﺘﻢ ﺭﻳﺸﻪ ﻳﺎﺑﻲ ﺑﺮﺍﻱ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ
ﺭﺿﺎ ﺣﺴﺎﻣﻲ ﻓﺮﺩ
ﻏﻼﻣﺮﺿﺎ ﻗﺎﺳﻢ ﺛﺎﻧﻲ
ﺩﺍﻧﺸﮕﺎﻩ ﺻﻨﻌﺘﻲ ﺷﺮﻳﻒ
ﺩﺍﻧﺸﮑﺪﻩ ﻣﻬﻨﺪﺳﻲ ﮐﺎﻣﭙﻴﻮﺗﺮ
ﺩﺍﻧﺸﮕﺎﻩ ﺻﻨﻌﺘﻲ ﺷﺮﻳﻒ
ﺩﺍﻧﺸﮑﺪﻩ ﻣﻬﻨﺪﺳﻲ ﮐﺎﻣﭙﻴﻮﺗﺮ
[email protected]
sani@ sharif.edu
ﭼﮑﻴﺪﻩ :ﺭﻳﺸﻪ ﻳﺎﺑﻲ ﮐﻠﻤﺎﺕ ﺩﺭ ﺑـﺴﻴﺎﺭﻱ ﺍﺯ ﺯﻣﻴﻨـﻪ ﻫـﺎﻱ ﭘـﺮﺩﺍﺯﺵ ﺯﺑـﺎﻥ
ﻃﺒﻴﻌﻲ ﮐﺎﺭﺑﺮﺩﻫﺎﻱ ﮔﺴﺘﺮﺩﻩ ﺍﻱ ﺩﺍﺭﺩ .ﻣﺘﻦ ﮐﺎﻭﻱ ﻭ ﺗﺮﺟﻤﻪ ﻣﺘﻮﻥ ﺍﺯ ﺭﻳـﺸﻪ
ﻳﺎﺑﻲ ﮐﻠﻤﺎﺕ ﺑﺮﺍﻱ ﺑﻬﻴﻨﻪﮐﺮﺩﻥ ﺭﻭﺵﻫﺎﻱ ﺧﻮﺩ ﺑﻴﺸﺘﺮ ﺍﺳـﺘﻔﺎﺩﻩ ﮐـﺮﺩﻩ ﺍﻧـﺪ.
ﺭﻳﺨﺖ ﺷﻨﺎﺳﻲ ﺍﺯ ﻣﻬﻤﺘﺮﻳﻦ ﺍﺑﺰﺍﺭﻫﺎﻱ ﻣﻮﺭﺩ ﺍﺳﺘﻔﺎﺩﻩ ﺩﺭ ﺭﻳﺸﻪﻳﺎﺑﻲ ﮐﻠﻤـﺎﺕ
ﺍﺳﺖ .ﺩﺭ ﺍﻳﻦ ﻣﻘﺎﻟﻪ ﻳﮑﻲ ﺍﺯ ﻣﻬﻤﺘﺮﻳﻦ ﺭﻭﺵ ﻫﺎﻱ ﻣﺒﺘﻨﻲ ﺑﺮ ﺭﻳﺨﺖ ﺷﻨﺎﺳﻲ
ﺭﺍ ﺑﻬﻴﻨﻪ ﮐﺮﺩﻩ ﺍﻳﻢ .ﻋﻼﻭﻩ ﺑﺮﺍﻳﻦ ﺑﺮﺍﻱ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ ﻳﮏ ﺍﻟﮕﻮﺭﻳﺘﻢ ﺭﻳﺸﻪﻳـﺎﺑﻲ
ﺭﺍ ﻣﻌﺮﻓﻲ ﺧﻮﺍﻫﻴﻢ ﮐﺮﺩ .ﺑﺮﺍﻱ ﺍﻭﻟﻴﻦ ﺑﺎﺭ ﺩﺭ ﺍﻳﻦ ﻣﻘﺎﻟﻪ ﺍﺯ ﻧﻘﺶ ﮐﻠﻤﺎﺕ ﺑـﺮﺍﻱ
ﺭﻳﺸﻪ ﻳﺎﺑﻲ ﺍﺳﺘﻔﺎﺩﻩ ﺷﺪﻩ ﺍﺳﺖ.
ﻭﺍﮊﻩ ﻫﺎﻱ ﻛﻠﻴﺪﻱ :ﺭﻳﺸﻪ ﻳﺎﺑﻲ ،ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ ،ﭘﺮﺩﺍﺯﺵ ﺯﺑﺎﻥ ،ﺳﻴﺴﺘﻤﻬﺎﻱ
ﻣﺘﺮﺟﻢ.
‐١ﻣﻌﺮﻓﻲ
ﺭﻳﺸﻪ ﻳﺎﺑﻲ ﮐﻠﻤﺎﺕ
ﭘﺮﺩﺍﺯﺵ ﺯﺑﺎﻥ ﻃﺒﻴﻌﻲ ﺩﺭ ﺳﺎﻝ ﻫﺎﻱ ﺍﺧﻴﺮ ﺭﺷﺪ ﻗﺎﺑﻞ ﺗﻮﺟﻬﻲ ﺩﺍﺷﺘﻪ ﺍﺳـﺖ.
ﺭﻭﻳﮑﺮﺩﻫﺎﻱ ﺟﺪﻳﺪ ﺑﻪ ﺯﺑﺎﻥ ﻃﺒﻴﻌﻲ ﺑﻪ ﻋﻨﻮﺍﻥ ﺍﺑﺰﺍﺭﻱ ﺑﺮﺍﻱ ﺑﺮﻗﺮﺍﺭﻱ ﺍﺭﺗﺒـﺎﻁ
ﺑﺎ ﮐﺎﺭﺑﺮﺍﻥ ﺭﺍﻳﺎﻧﻪ ﺍﺯ ﻳﮏ ﺳﻮ ﻭ ﺍﻓﺰﺍﻳﺶ ﺣﺠـﻢ ﺍﻃﻼﻋـﺎﺕ ﻭ ﺩﺍﺩﻩ ﻫـﺎ ﻭ ﺭﺷـﺪ
ﺍﺑﺰﺍﺭﻫﺎﻱ ﭘﺮﺩﺍﺯﺵ ﻣﺘﻦ ﺍﺯ ﺳﻮﻱ ﺩﻳﮕـﺮ ﺭﺷـﺪ ﺳﻴـﺴﺘﻢ ﻫـﺎﻱ ﭘﺮﺩﺍﺯﺷـﻲ ﺭﺍ
ﺳﺒﺐ ﺷﺪﻩ ﺍﺳﺖ[۳].
ﺭﻳﺸﻪ ﻳﺎﺑﻲ ﮐﻠﻤﺎﺕ ﺑﺎ ﺗﻮﺟﻪ ﺑﻪ ﺭﺷـﺪ ﭘـﺮﺩﺍﺯﺵ ﺯﺑـﺎﻥ ﻃﺒﻴﻌـﻲ ﮐﺎﺭﺑﺮﺩﻫـﺎﻱ
ﻓﺮﺍﻭﺍﻧﻲ ﭘﻴﺪﺍ ﮐﺮﺩﻩ ﺍﺳﺖ .ﺑﻪ ﻃﻮﺭ ﮐﻠﻲ ﺩﻭ ﮐﺎﺭﺑﺮﺩ ﻋﻤﺪﻩ ﺑـﺮﺍﻱ ﺭﻳـﺸﻪ ﻳـﺎﺑﻲ
ﮐﻠﻤﺎﺕ ﻣﺮﺳﻮﻡ ﺍﺳﺖ:
•
ﺭﻳﺸﻪ ﻳﺎﺑﻲ ﮐﻠﻤﺎﺕ ﺩﺭ ﻣﺎﺷﻴﻦ ﻫـﺎﻱ ﻣﺘـﺮﺟﻢ :ﮐﻠﻤـﺎﺕ ﺑـﻪ
ﻫﻤﺮﺍﻩ ﺍﺷﺘﻘﺎﻕ ﻫﺎﻱ ﺁﻧﻬﺎ ﺗﻨﻮﻉ ﻣﺤﺴﻮﺳﻲ ﺑﻪ ﮐﻠﻤﺎﺕ ﻣﻲ ﺩﻫﻨـﺪ
ﻭ ﻋﻤﻼ ﺑﺎﻋﺚ ﺩﺷﻮﺍﺭﺗﺮ ﺷﺪﻥ ﺳﺎﺧﺘﺎﺭ ﻫﺎﻱ ﺗﺮﺟﻤـﻪﻱ ،ﺟﻤـﻼﺕ
ﻣــﻲ ﺷــﻮﻧﺪ .ﺑــﺎ ﺍﺳــﺘﻔﺎﺩﻩ ﺍﺯ ﺭﻳــﺸﻪ ﻳــﺎﺑﻲ ﮐﻠﻤــﺎﺕ ﺑــﻪ ﮐــﺎﻫﺶ
ﭘﻴﭽﻴﺪﮔﻲ ﻫﺎﻱ ﺗﺮﺟﻤﻪ ﻣﻲ ﭘﺮﺩﺍﺯﻧﺪ.
•
ﺭﻳﺸﻪ ﻳﺎﺑﻲ ﮐﻠﻤﺎﺕ ﺩﺭ ﺳﻴﺴﺘﻢ ﻫﺎﻱ ﺑﺎﺯﻳـﺎﺑﻲ ﺍﻃﻼﻋـﺎﺕ:
ﺑﺎﺯﻳﺎﺑﻲ ﺍﻃﻼﻋﺎﺕ ﻭ ﭘﺮﺩﺍﺯﺵ ﻣﺘﻦ ﻳﮑﻲ ﺍﺯ ﮐﺎﺭﺑﺮﺩﻫﺎﻱ ﺭﻭ ﺑﻪ ﺭﺷﺪ
ﺩﻭﺭﻩ ﺍﺧﻴﺮ ) ﻋﺼﺮ ﻓـﻦﺁﻭﺭﻱ ﺍﻃﻼﻋـﺎﺕ( ﻣﺤـﺴﻮﺏ ﻣـﻲ ﮔـﺮﺩﺩ.
ﭘﺮﺩﺍﺯﺵ ﻭ ﺩﺳﺘﻪﺑﻨﺪﻱ ﺍﺧﺒـﺎﺭ ،ﭘـﺮﺩﺍﺯﺵ ﻧﻮﺷـﺘﻪﻫـﺎﻱ ﻋﻠﻤـﻲ ﻭ
ﮐﺎﺭﺑﺮﺩﻫــﺎﻳﻲ ﺍﺯ ﺍﻳــﻦ ﺩﺳــﺖ ﺍﻣــﺮﻭﺯﻩ ﮐــﺎﻣﻼ ﻣﺮﺳــﻮﻡ ﺍﺳــﺖ .ﺩﺭ
ﺳﻴﺴﺘﻢﻫﺎﻱ ﺑﺎﺯﻳﺎﺑﻲ ﺍﻃﻼﻋﺎﺕ ﻣﻌﻤﻮﻻ ﻳـﮏ ﭘﺎﻳﮕـﺎﻩ ﺩﺍﺩﻩ ﺑـﺴﻴﺎﺭ
ﺑﺰﺭﮒ ﻭﺟﻮﺩ ﺩﺍﺭﺩ ﮐﻪ ﺑﺎﻳﺪ ﭘﺮﺩﺍﺯﺵ ﻭ ﺑﺎﺯﻳﺎﺑﻲ ﺍﻃﻼﻋﺎﺕ ﺑـﺮ ﺭﻭﻱ
ﺁﻧﻬﺎ ﺻﻮﺭﺕ ﭘﺬﻳﺮﺩ .ﻫﺮ ﭼﻪ ﺷﺒﮑﻪﻫﺎﻱ ﻣﻌﻨﺎﻳﻲ ﺍﺳﺘﺨﺮﺍﺝ ﺷﺪﻩ ﺍﺯ
ﺍﻳﻦ ﺍﻃﻼﻋﺎﺕ ﺩﻗﻴﻖﺗﺮ ﻭ ﮔﺴﺘﺮﺩﻩﺗﺮ ﺑﺎﺷﺪ ،ﺍﻣﮑﺎﻥ ﻓـﺮﺍﻫﻢ ﺷـﺪﻥ
ﺍﻃﻼﻋــﺎﺕ ﺍﺳــﺘﺨﺮﺍﺝ ﺷــﺪﻩ ﺑﻴــﺸﺘﺮ ،ﺭﺍﺣــﺖﺗــﺮ ﺍﺳــﺖ .ﻳﮑــﻲ ﺍﺯ
ﮐﺎﺭﺑﺮﺩﻫﺎﻱ ﺭﻳـﺸﻪﻳـﺎﺑﻲ ﺩﺭ ﺍﻣﮑـﺎﻥ ﻓـﺮﺍﻫﻢ ﮐـﺮﺩﻥ ﺷـﺒﮑﻪﻫـﺎﻱ
ﻣﻌﻨﺎﻳﻲ ﮔﺴﺘﺮﺩﻩﺗﺮ ﺩﺭ ﺳﻴﺴﺘﻢﻫـﺎﻱ ﭘـﺮﺩﺍﺯﺵ ﻣـﺘﻦ ﻭ ﺑﺎﺯﻳـﺎﺑﻲ
ﺍﻃﻼﻋﺎﺕ ﺍﺳﺖ.
ﻋﻠﻲ ﺭﻗﻢ ﺁﻧﮑﻪ ﺻﻮﺭﺕ ﻣﺴﺎﻟﻪ ﺩﺭ ﺩﻭ ﮐﺎﺭﺑﺮﺩ ﺑـﺎﻻ ﺑـﺴﻴﺎﺭ ﻣـﺸﺎﺑﻪ ﺍﺳـﺖ ،ﺍﻣـﺎ
ﺭﻳــﺸﻪﻳــﺎﺑﻲ ﮐﻠﻤــﺎﺕ ﺩﺭ ﺁﻧﻬــﺎ ﺩﺍﺭﺍﻱ ﻧﻴﺎﺯﻣﻨــﺪﻱﻫــﺎﻱ ﻣﺘﻔــﺎﻭﺗﻲ ﺍﺳــﺖ .ﺩﺭ
ﺳﻴﺴﺘﻢﻫﺎﻱ ﻣﺘﺮﺟﻢ ﺑﻴﺸﺘﺮ ﺑـﻪ ﺩﻧﺒـﺎﻝ ﺭﻳـﺸﻪﻳـﺎﺑﻲ ﮐﻠﻤـﺎﺗﻲ ﻫـﺴﺘﻴﻢ ﮐـﻪ
ﺍﺷﺘﻘﺎﻕ ﮐﻠﻤﻪ ﺩﺭ ﻧﻮﻉ ﺁﻥ )ﻓﻌﻞ ،ﺍﺳﻢ ﻭ (...ﺗﻐﻴﻴﺮﻱ ﺍﻳﺠﺎﺩ ﻧﻤـﻲﮐﻨـﺪ .ﺯﻳـﺮﺍ
ﻫﻨﮕﺎﻣﻲ ﮐﻪ ﻧﻮﻉ ﮐﻠﻤﻪ ،ﺗﻐﻴﻴﺮ ﻣﻲﮐﻨﺪ ﻣﻌﺎﺩﻝ ﺁﻥ ﺩﺭ ﺯﺑﺎﻥ ﻣﻘـﺼﺪ ﺗﻐﻴﻴـﺮﺍﺕ
ﻼ ﺗﺮﺟﻤﻪ ﺍﻧﺠﺎﻡ ﺷـﺪﻩ ﺍﺯ ﮐﻴﻔﻴـﺖ ﻣﻄﻠـﻮﺑﻲ ﺑﺮﺧـﻮﺭﺩﺍﺭ
ﺯﻳﺎﺩﻱ ﻣﻲﮐﻨﺪ ﻭ ﻋﻤ ﹰ
ﻧﻤﻲ ﺑﺎﺷﺪ .ﺍﻳﻦ ﺩﺭ ﺣﺎﻟﻲ ﺍﺳﺖ ﮐﻪ ﺩﺭ ﺳﻴﺴﺘﻢ ﻫﺎﻱ ﭘﺮﺩﺍﺯﺵ ﻣـﺘﻦ ،ﮐـﺸﻒ
ﺗﻤﺎﻣﻲ ﺍﺭﺗﺒﺎﻁ ﻫﺎ ﺑﺮﺍﻱ ﻣﺎ ﺍﻫﻤﻴﺖ ﺩﺍﺭﺩ .ﺑﻪ ﻫﻤﻴﻦ ﺩﻟﻴﻞ ﺗﻐﻴﻴﺮ ﻧﻘﺶ ﻣـﺴﺎﻟﻪ
ﻣﻬﻤﻲ ﻧﻴﺴﺖ.
ﺑﻨﺎﺑﺮﺍﻳﻦ ﺩﺭ ﻣﺎﺷﻴﻦ ﻫﺎﻱ ﻣﺘﺮﺟﻢ ﺑﻴﺸﺘﺮ ﺑﺮ ﺭﻭﻱ ﻣﻮﺍﺭﺩﻱ ﺗﻤﺮﮐﺰ ﻣـﻲ ﺷـﻮﺩ
ﮐﻪ ﻧﻘﺶ ﮐﻠﻤﺎﺕ ﺩﺭ ﺁﻧﻬﺎ ﺗﻐﻴﻴﺮﻱ ﻧﻤﻲﮐﻨﺪ .ﺻﺮﻑ ﺍﻓﻌﺎﻝ ﺍﺯ ﺍﻳﻦ ﺟﻤﻠﻪ ﺍﺳﺖ.
ﺍﻟﺒﺘﻪ ﺑﺎﻳﺪ ﺑﻪ ﻳﮏ ﻣﺴﺎﻟﻪ ﺍﺳﺎﺳﻲ ﺗﻮﺟﻪ ﮐﺮﺩ .ﺍﻳﻦ ﻣﺴﺎﻟﻪ ﺍﺻﻼ ﺑﻪ ﺍﻳﻦ ﻣﻌﻨـﻲ
ﻧﻴﺴﺖ ﮐﻪ ﺭﻳﺸﻪﻳﺎﺑﻲ ﺩﺭ ﻣﻮﺍﺭﺩ ﺩﺭ ﺳﻴﺴﺘﻢ ﻫـﺎﻱ ﺗﺮﺟﻤـﻪ ﮐـﺎﺭﺍﻳﻲ ﻧﺪﺍﺭﻧـﺪ؛
ﺑﻠﮑﻪ ﺑﺎ ﺗﻮﺟﻪ ﺑﻪ ﺍﺑﺰﺍﺭﻫﺎﻱ ﮐﻨﻮﻧﻲ ﺳﻴﺴﺘﻢ ﻫﺎﻱ ﺗﺮﺟﻤﻪ ﮐﻪ ﺑﻴـﺸﺘﺮ ﺩﺭ ﻻﻳـﻪ
ﺳﺎﺧﺘﺎﺭ ﮐﻠﻤﺎﺕ ﮐﺎﺭﺑﺮﺩ ﺩﺍﺭﻧﺪ ﻭ ﻧﻪ ﺩﺭ ﻻﻳﻪ ﻣﻔﻬﻮﻡ ،ﺑﺎﻳﺪ ﺭﻳﺸﻪﻳـﺎﺑﻲ ﺭﺍ ﺗﻨﻬـﺎ
ﺩﺭ ﻫﻤﻴﻦ ﻣﺴﻴﺮ ﻫﺪﺍﻳﺖ ﮐﺮﺩ].[۲،۳
ﮐﻠﻤﺎﺕ ،ﺩﺭ ﻫﺮ ﺯﺑﺎﻥ ﺑـﻪ ﺩﻭ ﺩﺳـﺘﻪ ﺟﺎﻣـﺪ ﻭ ﻣﺮﮐـﺐ ﺗﻘـﺴﻴﻢ ﻣﻴـﺸﻮﻧﺪ .ﺑـﻪ
ﮐﻠﻤﺎﺗﻲ ﮐﻪ ﺍﺯ ﺩﻳﮕﺮ ﮐﻠﻤﺎﺕ ﻣﺸﺘﻖ ﺷﺪﻩﺍﻧﺪ ﮐﻠﻤﺎﺕ ﻣﺮﮐﺐ ﮔﻔﺘﻪ ﻣﻲ ﺷـﻮﺩ.
ﮐﻠﻤﺎﺕ ﺟﺎﻣﺪ ﮐﻠﻤﺎﺗﻲ ﻫﺴﺘﻨﺪ ﮐﻪ ﺩﺭ ﺯﺑﺎﻥ ﺍﺯ ﻫﻴﭻ ﮐﻠﻤﻪﺍﻱ ﻣﺸﺘﻖ ﻧﺸﺪﻩﺍﻧﺪ.
ﻳﺎﻓﺘﻦ ﺭﻳﺸﻪ ﮐﻠﻤﺎﺕ ﻣﺮﮐﺐ ﺭﺍ ﺍﺻﻄﻼﺣﺎ ﺭﻳﺸﻪﻳﺎﺑﻲ ﮐﻠﻤﺎﺕ ﻣﻲ ﮔـﻮﻳﻴﻢ .ﺑـﻪ
ﻋﻨﻮﺍﻥ ﻣﺜﺎﻝ ﺩﺭ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ "ﺁﻣﻮﺯﮔﺎﺭ" ﮐﻠﻤﻪ ﻣﺮﮐﺒﻲ ﺍﺳـﺖ ﮐـﻪ ﺍﺯ ﺗﺮﮐﻴـﺐ
"ﺁﻣﻮﺯ" ﻭ "ﮔﺎﺭ" ﺗﺸﮑﻴﻞ ﺷﺪﻩ ﺍﺳﺖ؛ ﻳـﺎ ﺩﺭ ﺯﺑـﺎﻥ ﺍﻧﮕﻠﻴـﺴﻲ ’ ‘teacherﺑـﺎ
ﮐﻠﻤﺎﺕ ’ ‘teachﻭ ’ ‘erﺳﺎﺧﺘﻪ ﺷﺪﻩ ﺍﺳﺖ.
‐٢ﺍﻟﮕﻮﺭﻳﺘﻢﻫﺎﻱ ﺭﻳﺸﻪﻳﺎﺑﻲ
ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﭘﺲﻭﻧﺪ ﻭ ﭘﻴﺶﻭﻧﺪ ﮐﻠﻤﺎﺕ ﺭﺍ ﺑﺮﺭﺳﻲ ﻣﻲ ﮐﻨﺪ ﻭ ﺩﺭ
ﻣﺎﺷﻴﻦ ﻫﺎﻱ ﻣﺘﺮﺟﻢ ﮐﺎﺭﺍﻳﻲ ﺧﻮﺑﻲ ﺭﺍ ﻧﺸﺎﻥ ﺩﺍﺩﻩ ﺍﺳﺖ.
ﺗﺎﮐﻨﻮﻥ ﺍﻟﮕﻮﺭﻳﺘﻢﻫﺎﻱ ﺯﻳﺎﺩﻱ ﺑﺮﺍﻱ ﺭﻳﺸﻪﻳﺎﺑﻲ ﮐﻠﻤﺎﺕ ﻣﻌﺮﻓﻲ ﺷﺪﻩ ﺍﺳﺖ .ﺑﺎ
ﺍﻳﻦ ﺣﺎﻝ ﺩﺭ ﻳﮏ ﺩﺳﺘﻪ ﺑﻨﺪﻱ ﮐﻠﻲ ﻣﻲ ﺗﻮﺍﻥ ﺩﻭ ﺍﻳﺪﻩ ﺍﺻﻠﻲ ﺭﺍ ﺩﺭ ﺁﻧﻬﺎ
ﺟﺴﺘﺠﻮ ﮐﺮﺩ:
•
ﺷﺒﮑﻪ ﻫﺎﻱ ﮐﻠﻤﺎﺕ :١ﺩﺭ ﺍﻳﻦ ﺭﻭﺵﻫﺎ ﺍﺯ ﺍﺭﺗﺒﺎﻁ ﮐﻠﻤﺎﺕ ﺑﺎ ﻫﻢ
ﺩﺭ ﻳﮏ ﺷﺒﮑﻪ ﻣﻌﻨﺎﻳﻲ ﺍﺳﺘﻔﺎﺩﻩ ﻣﻲ ﺷﻮﺩ .ﮐﻠﻤﺎﺗﻲ ﮐﻪ ﺩﺍﺭﺍﻱ
ﻳﮏ ﺭﻳﺸﻪ ﻫﺴﺘﻨﺪ ﻣﺸﺨﺺ ﺷﺪﻩ ﻭ ﺩﺭ ﻳﮏ ﺷﺒﮑﻪ ﮔﺮﺍﻑ ﻣﺎﻧﻨﺪ
ﺑﻪﻃﻮﺭﻱ ﮐﻪ ﺍﻋﻀﺎﻱ ﻳﮏ ﺩﺳﺘﻪ ﺗﺸﮑﻴﻞ ﻳﮏ ﺧﻮﺷﻪ ﺭﺍ ﺩﺍﺩﻩﺍﻧﺪ،
ﻧﮕﻪﺩﺍﺭﻱ ﻣﻲﺷﻮﺩ .ﺍﻳﻦ ﺭﻭﺵﻫﺎ ﺍﺣﺘﻴﺎﺝ ﺑﻪ ﻧﻈﺎﺭﺕ ﺑﺴﻴﺎﺭ ﺯﻳﺎﺩ
ﻋﺎﻣﻞ ﺍﻧﺴﺎﻧﻲ ﺩﺍﺭﻧﺪ .ﺯﻳﺮﺍ ﻫﻨﻮﺯ ﺍﻟﮕﻮﺭﻳﺘﻤﻬﺎﻳﻲ ﮐﻪ ﺑﺘﻮﺍﻥ ﺑﺎ
ﺍﺳﺘﻔﺎﺩﻩ ﺍﺯ ﺁﻧﻬﺎ ﺷﺒﮑﻪﻫﺎﻱ ﻗﺎﺑﻞ ﺍﻃﻤﻴﻨﺎﻥ ﻳﺎﻓﺖ ﻭﺟﻮﺩ ﻧﺪﺍﺭﺩ .ﺍﻣﺎ
ﺩﺭ ﻣﻘﺎﺑﻞ ﺩﺍﻣﻨﻪ ﮐﺎﺭﺍﻳﻲ ﺍﻳﻦ ﺍﻟﮕﻮﺭﻳﺘﻢﻫﺎ ﺑﺴﻴﺎﺭ ﮔﺴﺘﺮﺩﻩ ﺍﺳﺖ ﻭ
ﺩﺭ ﺳﻴﺴﺘﻢﻫﺎﻱ ﺩﺍﺩﻩ ﮐﺎﻭﻱ ﻭ ﻣﺘﻦ ﮐﺎﻭﻱ ﮐﺎﺭﺍﻳﻲ ﻣﻨﺎﺳﺒﻲ
ﺩﺍﺭﻧﺪ .ﻧﻤﻮﻧﻪﺍﻱ ﺍﺯ ﺳﻴﺴﺘﻢﻫﺎﻱ ﻣﻮﻓﻖ ﺭﻳﺸﻪﻳﺎﺑﻲ ﺑﺎ ﺍﻳﻦ ﺭﻭﻳﮑﺮﺩ
ﺩﺭ ] [۵ﺩﻳﺪﻩ ﻣﻲﺷﻮﺩ.
•
ﺭﻳﺨﺖ ﺷﻨﺎﺳﻲ :٢ﺩﺭ ﺭﻭﻳﮑﺮﺩ ﺩﻭﻣﻲ ﮐﻪ ﺑﻪ ﻣﺴﺎﻟﻪ ﺭﻳﺸﻪ ﻳﺎﺑﻲ
ﻭﺟﻮﺩ ﺩﺍﺭﺩ ﺍﺯ ﻗﻮﺍﻧﻴﻦ ﺳﺎﺧﺖ ﮐﻠﻤﺎﺕ ﺍﺳﺘﻔﺎﺩﻩ ﻣﻲﺷﻮﺩ .ﺍﻳﻦ
ﺍﻟﮕﻮﺭﻳﺘﻢﻫﺎ ﺑﺎ ﺑﺮﺭﺳﻲ ﺭﻭﺵﻫﺎﻱ ﺳﺎﺧﺖ ﮐﻠﻤﺎﺕ ﺭﻳﺸﻪ ﮐﻠﻤﺎﺕ
ﺭﺍ ﭘﻴﺪﺍ ﻣﻲﮐﻨﻨﺪ .ﺍﻳﻦ ﺭﻭﺵﻫﺎ ﺑﻪ ﻋﻠﺖ ﻣﺸﺨﺺ ﺑﻮﺩﻥ ﺭﻭﺵﻫﺎﻱ
ﺳﺎﺧﺖ ﮐﻠﻤﺎﺕ ﺍﻣﮑﺎﻥ ﻣﮑﺎﻧﻴﺰﻩﺷﺪﻥ ﺧﻮﺑﻲ ﺭﺍ ﻓﺮﺍﻫﻢ ﮐﺮﺩﻩﺍﻧﺪ.
ﺍﻣﺎ ﻣﺸﮑﻞ ﺍﺻﻠﻲ ﺩﺭ ﺁﻧﻬﺎ ﻫﻨﮕﺎﻣﻲ ﺭﺥ ﻣﻲ ﺩﻫﺪ ﮐﻪ ﺩﻭ ﮐﻠﻤﻪ
ﺟﺎﻣﺪ ﺑﺎ ﺩﻭ ﺗﻐﻴﻴﺮ ﻣﺨﺘﻠﻒ ﻳﮏ ﮐﻠﻤﻪ ﻣﺮﮐﺐ ﻳﮑﺴﺎﻥ ﻣﻲ ﺳﺎﺯﻧﺪ.
ﺑﻪ ﻋﻨﻮﺍﻥ ﻣﺜﺎﻝ ﮐﻠﻤﻪ goesﻣﻲ ﺗﻮﺍﻧﺪ ﺑﻪ ﻣﻌﻨﻲ goe + sﺑﻪ
ﻣﻌﻨﻲ ﺯﻧﺒﻮﺭﻫﺎﻱ ﻋﺴﻞ ﺑﺎﺷﺪ ﻭ ﻳﺎ go + esﺑﻪ ﻣﻌﻨﻲ ﺻﻮﺭﺗﻲ
ﺍﺯﻓﻌﻞ ﺭﻓﺘﻦ .ﻧﻤﻮﻧﻪﻫﺎﻳﻲ ﺍﺯ ﺍﻟﮕﻮﺭﻳﺘﻢﻫﺎﻱ ﺭﻳﺨﺖ ﺷﻨﺎﺳﻲ ][۱،۴
ﻫﺴﺘﻨﺪ.
ﺍﻭﻟﻴﻦ ﮐﺎﺭﺑﺮﺩ ﺍﻳﻦ ﺍﻳﺪﻩ ﺑﺮﺍﻱ ﺯﺑﺎﻥ ﺍﻧﮕﻠﻴﺴﻲ ﻣﻌﺮﻓﻲ ﺷﺪﻩ ﺍﺳﺖ .ﺻﺮﻑ ﺍﻓﻌﺎﻝ
ﺯﺑﺎﻥ ﺍﻧﮕﻠﻴﺴﻲ ﻭ ﻫﻤﭽﻨﻴﻦ ﺍﻓﺰﻭﺩﻥ s/esﺑﻪ ﺍﻧﺘﻬﺎﻱ ﺍﺳﺎﻣﻲ ﺑﺮﺍﻱ ﺟﻤﻊ
ﺑﺴﺘﻦ ﺁﻧﻬﺎ ﻣﻮﺭﺩ ﺑﺮﺭﺳﻲ ﻗﺮﺍﺭ ﻣﻲﮔﻴﺮﺩ .ﻫﻤﺎﻥ ﻃﻮﺭ ﮐﻪ ﻣﻼﺣﻈﻪ ﻣﻲﺷﻮﺩ
ﺍﻳﻦ ﺗﻐﻴﻴﺮﺍﺕ ﻧﻘﺶ ﮐﻠﻤﺎﺕ ﺭﺍ ﺗﻐﻴﻴﺮ ﻧﻤﻲ ﺩﻫﻨﺪ ،ﺑﻪ ﻫﻤﻴﻦ ﺩﻟﻴﻞ ﺑﺮﺍﻱ
ﻣﺎﺷﻴﻦ ﻫﺎﻱ ﻣﺘﺮﺟﻢ ﺑﺴﻴﺎﺭ ﻣﻨﺎﺳﺐ ﻫﺴﺘﻨﺪ .ﺍﻟﺒﺘﻪ ﺑﺴﻴﺎﺭﻱ ﺍﺯ ﺩﻳﮕﺮ
ﺗﻐﻴﻴﺮﺍﺗﻲ ﮐﻪ ﺑﻪ ﺻﻮﺭﺕ ﻗﺎﻧﻮﻥﻣﻨﺪ ﺩﺭ ﮐﻠﻤﺎﺕ ﺍﻳﺠﺎﺩ ﻣﻲ ﺷﻮﻧﺪ ،ﻣﺎﻧﻨﺪ
ﻗﻴﺪﻫﺎﻳﻲ ﮐﻪ ﺑﺎ ﺍﻓﺰﻭﺩﻥ lyﺑﻪ ﺻﻔﺎﺕ ﺳﺎﺧﺘﻪ ﻣﻲﺷﻮﻧﺪ ،ﻫﻢ ﻗﺎﺑﻞ ﺗﺸﺨﻴﺺ
ﺗﻮﺳﻂ ﺍﻳﺪﻩ ﺍﻳﻦ ﺍﻟﮕﻮﺭﻳﺘﻢ ﻫﺴﺘﻨﺪ.
ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﺗﻌﺪﺍﺩﻱ ﺍﺯ ﭘﺲﻭﻧﺪﻫﺎ ﺭﺍ ﻣﻮﺭﺩ ﺑﺮﺭﺳﻲ ﻗﺮﺍﺭ ﻣﻲ ﺩﻫﺪ ﻭ ﺩﺭ
ﻫﺮ ﻣﺮﺣﻠﻪ ﺑﺎ ﺯﺩﻭﺩﻥ ﭘﺴﻮﻧﺪ ﻳﺎﻓﺖ ﺷﺪﻩ ،ﺭﻳﺸﻪ ﺑﺪﺳﺖ ﺁﻣﺪﻩ ﺭﺍ ﻣﻮﺭﺩ ﺑﺮﺭﺳﻲ
ﻗﺮﺍﺭ ﻣﻲ ﺩﻫﺪ .ﺩﺭ ﺻﻮﺭﺗﻲ ﮐﻪ ﺍﻳﻦ ﺭﻳﺸﻪ ﺩﺭ ﻓﺮﻫﻨﮓ ﻟﻐﺖ ﻣﻮﺟﻮﺩ ﺑﺎﺷﺪ،
ﻋﻤﻞ ﺭﻳﺸﻪ ﻳﺎﺑﻲ ﻣﺘﻮﻗﻒ ﻣﻲ ﺷﻮﺩ .ﮔﺎﻫﻲ ﺍﻓﺰﻭﺩﻥ ﻳﮏ ﭘﺴﻮﻧﺪ ﺑﺎﻋﺚ
ﺗﻐﻴﻴﺮﺍﺗﻲ ﺩﺭ ﺳﺎﺧﺘﺎﺭ ﻳﮏ ﮐﻠﻤﻪ ﻣﻲ ﺷﻮﺩ ﻭ ﻫﻤﭽﻨﻴﻦ ﮔﺎﻫﻲ ﺑﺎ ﺗﻮﺟﻪ ﺑﻪ
ﺳﺎﺧﺘﺎﺭ ﮐﻠﻤﻪ ﺍﻳﻦ ﭘﺴﻮﻧﺪﻫﺎ ﺗﻐﻴﻴﺮ ﺩﺍﺩﻩ ﻣﻲﺷﻮﺩ .ﺑﻪ ﻋﻨﻮﺍﻥ ﻣﺜﺎﻝ ﺍﮔﺮ ﺩﺭ
ﺍﻧﺘﻬﺎﻱ ﻳﮏ ﻓﻌﻞ ﺩﺭ ﺯﺑﺎﻥ ﺍﻧﮕﻠﻴﺴﻲ ’ ‘oﻗﺮﺍﺭ ﺩﺍﺷﺘﻪ ﺑﺎﺷﺪ ﺑﻪ ﺟﺎﻱ sﺑﺎﻳﺪ
ﭘﺴﻮﻧﺪ esﺭﺍ ﺑﻪ ﺍﻧﺘﻬﺎﻱ ﺁﻥ ﺍﻓﺰﻭﺩ .ﺑﻪ ﻫﻤﻴﻦ ﺩﻟﻴﻞ ﻭ ﺑﺮﺍﻱ ﮐﺸﻒ ﭼﻨﻴﻦ
ﮐﻠﻤﺎﺗﻲ ﺩﺭ ﺻﻮﺭﺗﻲ ﮐﻪ ﺭﻳﺸﻪ ﮐﻠﻤﻪ ﺩﺭ ﻓﺮﻫﻨﮓ ﻟﻐﺖ ﻧﺒﺎﺷﺪ ،ﺑﺮ ﺭﻭﻱ ﮐﻠﻤﻪ
ﻳﮏ ﺳﺮﻱ ﺗﻐﻴﻴﺮﺍﺗﻲ ﺍﻋﻤﺎﻝ ﻣﻲ ﮐﻨﺪ ﺗﺎ ﻫﻨﮕﺎﻣﻲ ﮐﻪ ﺭﻳﺸﻪ ﺩﺭ ﻓﺮﻫﻨﮓ ﻟﻐﺖ
ﻳﺎﻓﺖ ﺷﻮﺩ.
ﺑﺎ ﺍﻳﻦ ﺣﺎﻝ ﺩﺭ ﻣﻮﺍﺭﺩ ﺑﺴﻴﺎﺭﻱ ﺩﻳﺪﻩ ﻣﻲﺷﻮﺩ ﮐﻪ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﻧﺘﺎﻳﺞ
ﺩﺭﺳﺘﻲ ﺭﺍ ﻧﺸﺎﻥ ﻧﻤﻲﺩﻫﺪ .ﺟﺪﻭﻝ ۱ﺑﺮﺧﻲ ﺍﺯ ﺁﻧﻬﺎ ﺭﺍ ﻧﺸﺎﻥ ﻣﻲﺩﻫﺪ .ﺑﻪ
ﻫﻤﻴﻦ ﺩﻟﻴﻞ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ۲ﺭﺍ ﺑﺮﺍﻱ ﺯﺑﺎﻥ ﺍﻧﮕﻠﻴﺴﻲ ﻣﻌﺮﻓﻲ ﻣﻲﮐﻨﻴﻢ ﮐﻪ
ﺍﻳﻦ ﻣﺸﮑﻼﺕ ﺭﺍ ﺑﺮﻃﺮﻑ ﮐﺮﺩﻩ ﺍﺳﺖ.
ﺟﺪﻭﻝ ۱ﺑﺮﺧﻲ ﺍﺯ ﻣﺜﺎﻝ ﻫﺎﻳﻲ ﮐﻪ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﭘﺎﺳﺦ ﻧﺎﺩﺭﺳﺖ ﺑﻪ ﺁﻧﻬﺎ ﺩﺍﺩﻩ
ﺍﺳﺖ.
ﺩﺭ ﺍﻳﻦ ﺑﺨﺶ ﻣﺎ ﻣﻲﺧﻮﺍﻫﻴﻢ ﺍﻟﮕﻮﺭﻳﺘﻢ ﺭﻳﺸﻪﻳﺎﺑﻲ ﮐﻪ ﺍﺯ ﻧﻘﺶ ﮐﻠﻤﺎﺕ ﺑﺮﺍﻱ
ﺭﻳﺸﻪﻳﺎﺑﻲ ﺍﺳﺘﻔﺎﺩﻩ ﮐﺮﺩﻩ ﺍﺳﺖ ﺭﺍ ﻣﻌﺮﻓﻲ ﮐﻨﻴﻢ .ﺍﺑﺘﺪﺍ ﺑﻪ ﺗﺸﺮﻳﺢ ﺍﻟﮕﻮﺭﻳﺘﻢ
ﮐﺮﺍﻭﺗﺰ ٣ﻣﻲ ﭘﺮﺩﺍﺯﻳﻢ ].[۱
goe
go
going
۱‐۲ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ
bee
be
being
Find stem
ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﺍﻭﻟﻴﻦ ﺑﺎﺭ ﺩﺭ ﺳﺎﻝ ۱۹۹۳ﻣﻌﺮﻓﻲ ﺷﺪ] .[۱ﺍﻳﻦ ﺍﻟﮕﻮﺭﻳﺘﻢ
ﮐﻪ ﺍﺯ ﺭﻭﺵﻫﺎﻱ ﺭﻳﺨﺖ ﺷﻨﺎﺳﻲ ﺍﺳﺘﻔﺎﺩﻩ ﻣﻲ ﮐﻨﺪ ﻭ ﺍﺯ ﻳﮏ ﻓﺮﻫﻨﮓ ﻟﻐﺖ
ﺑﺮﺍﻱ ﺁﺯﻣﻮﺩﻥ ﺭﻳﺸﻪ ﻫﺎﻱ ﻳﺎﻓﺖ ﺷﺪﻩ ﺍﺳﺘﻔﺎﺩﻩ ﻣﻲﮐﻨﺪ .ﺍﻳﻦ ﺍﻟﮕﻮﺭﻳﺘﻢ ﺑﺮﺍﻱ
ﺯﺑﺎﻥﻫﺎﻳﻲ ﮐﻪ ﺳﺎﺧﺖ ﮐﻠﻤﺎﺕ ﻣﺮﮐﺐ ﺩﺭ ﺁﻧﻬﺎ ﻗﺎﻧﻮﻥﻣﻨﺪ ﺍﺳﺖ ،ﮐﺎﺭﺍﻳﻲ ﺧﻮﺑﻲ
ﺭﺍ ﻧﺸﺎﻥ ﺩﺍﺩﻩ ﺍﺳﺖ .ﺯﺑﺎﻥﻫﺎﻱ ﻣﺠﺎﺭﻱ ﻭ ﻋﺒﺮﻱ ﺍﺯ ﺍﻳﻦ ﺩﺳﺘﻪ ﻫﺴﺘﻨﺪ.
word net
1
Morphology
2
Krovetz
3
۲‐۲
Main stem
word
ﺍﻟﮕﻮﺭﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ۲
ﺩﺭ ﻧﺴﺨﻪ ﺑﻬﺒﻮﺩ ﻳﺎﻓﺘﻪ ﺍﺯ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ۲ﺑﺎ ﺍﺳﺘﻔﺎﺩﻩ ﺍﺯ ﻧﻘﺶ ﮐﻠﻤﺎﺕ ﺩﺭ
ﺟﻤﻼﺕ ﺑﺨﺶ ﻗﺎﺑﻞ ﺗﻮﺟﻬﻲ ﺍﺯ ﺍﺷﺘﺒﺎﻫﺎﺕ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﺭﺍ ﺑﺮﻃﺮﻑ
ﮐﺮﺩﻩﺍﻳﻢ .ﺩﺭ ﺍﻳﻦ ﺍﻟﮕﻮﺭﻳﺘﻢ ﺍﺯ ﻧﻘﺶ ﮐﻠﻤﺎﺕ ﻋﻼﻭﻩ ﺑﺮ ﻭﺟﻮﺩ ﻳﺎ ﻋﺪﻡ ﻭﺟﻮﺩ
ﺁﻧﻬﺎ ﺩﺭ ﻓﺮﻫﻨﮓ ﻟﻐﺖ ﺍﺳﺘﻔﺎﺩﻩ ﻣﻲ ﮐﻨﺪ .ﺑﻪ ﻋﻨﻮﺍﻥ ﻣﺜﺎﻝ ﺩﺭ ﻓﺮﺁﻳﻨﺪ ﺭﻳﺸﻪ
ﻳﺎﺑﻲ ﮐﻠﻤﻪ beingﺍﺑﺘﺪﺍ beeﺟﺴﺘﺠﻮ ﻣﻲ ﺷﻮﺩ ﮐﻪ ﺩﺭ ﻓﺮﻫﻨﮓ ﻟﻐﺖ ﻫﻢ
ﻣﻮﺟﻮﺩ ﻣﻲﺑﺎﺷﺪ .ﺑﻨﺎﺑﺮﺍﻳﻦ ﺑﻪ ﻋﻨﻮﺍﻥ ﺭﻳﺸﻪ ﮐﻠﻤﻪ ﺑﺮﮔﺮﺩﺍﻧﺪﻩ ﻣﻲ ﺷﺪ .ﺍﻣﺎ ﺑﺎ
ﺗﻮﺟﻪ ﺑﻪ ﺍﻳﻨﮑﻪ beeﻳﮏ ﺍﺳﻢ ﺍﺳﺖ ﻧﻤﻲﺗﻮﺍﻥ ﺑﻪ ﺁﻥ ﭘﺴﻮﻧﺪ ingﺍﻓﺰﻭﺩ
ﺑﻨﺎﺑﺮﺍﻳﻦ ﺍﻟﮕﻮﺭﻳﺘﻢ ﺍﻳﻦ ﺭﻳﺸﻪ ﺭﺍ ﺑﻪ ﻋﻨﻮﺍﻥ ﺭﻳﺸﻪ ﺍﺻﻠﻲ ﺍﻧﺘﺨﺎﺏ ﻧﻤﻲﮐﻨﺪ ﻭ
ﺑﻪ ﺍﻳﻦ ﺗﺮﺗﻴﺐ ﺑﺎ ﺍﺩﺍﻣﻪ ﺍﻟﮕﻮﺭﻳﺘﻢ ﺑﻪ ﺭﻳﺸﻪ ﺩﺭﺳﺖ ﮐﻠﻤﻪ ﮐﻪ ﻫﻤﺎﻥ beﺍﺳﺖ
ﻣﻲﺭﺳﻴﻢ .ﺍﺯ ﺁﻧﺠﺎ ﮐﻪ ﻫﺪﻑ ﻣﺎ ﺑﺮﺭﺳﻲ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﺩﺭ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ
ﺍﺳﺖ ﺑﺮﺭﺳﻲ ﺟﺰﻳﻴﺎﺕ ﺭﺍ ﺑﻪ ﺑﺨﺶ ﺑﻌﺪ ﻣﻮﮐﻮﻝ ﻣﻲﮐﻨﻴﻢ.
‐٣ﺭﻳﺸﻪ ﻳﺎﺑﻲ ﮐﻠﻤﺎﺕ ﻓﺎﺭﺳﻲ
ﺑﺮﺍﻱ ﺭﻳﺸﻪﻳﺎﺑﻲ ﺩﺭ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ ﺗﻼﺵﻫﺎﻱ ﺟﺪﻱ ﭼﻨﺪﺍﻧﻲ ﻧﺸﺪﻩ ﺍﺳﺖ .ﺩﺭ
] [۶ﺍﻟﮕﻮﺭﻳﺘﻢ ﻫﺎﻱ ﺧﻮﺑﻲ ﺍﺭﺍﺋﻪ ﺷﺪﻩ ﺍﺳﺖ .ﺍﻳﻦ ﺩﺭ ﺣﺎﻟﻲ ﺍﺳﺖ ﮐﻪ ﺩﺭ ﺯﺑﺎﻥ
ﻓﺎﺭﺳﻲ ﺑﻪ ﻋﻠﺖ ﺭﻭﺵﻣﻨﺪ ﺑﻮﺩﻥ ﺍﺷﺘﻘﺎﻕ ﮐﻠﻤﺎﺕ ،ﺭﻳﺸﻪﻳﺎﺑﻲ ﺍﺯ ﺍﻣﮑﺎﻥ
ﻣﮑﺎﻧﻴﺰﻩ ﺷﺪﻥ ﺧﻮﺑﻲ ﺑﺮﺧﻮﺭﺩﺍﺭ ﺍﺳﺖ .ﻫﻤﺎﻥ ﻃﻮﺭ ﮐﻪ ﮔﻔﺘﻪ ﺷﺪ ﺩﺭ ﺯﺑﺎﻥ
ﻫﺎﻳﻲ ﮐﻪ ﺩﺍﺭﺍﻱ ﺍﺷﺘﻘﺎﻕ ﻫﺎﻱ ﺭﻳﺨﺘﻲ ) (morphologicalﺯﻳﺎﺩﺗﺮﻱ
ﻫﺴﺘﻨﺪ ﻗﺎﺑﻠﻴﺖ ﻫﺎﻱ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﺑﻬﺘﺮ ﺩﻳﺪﻩ ﻣﻲ ﺷﻮﺩ .ﻓﺎﺭﺳﻲ ﻭ
ﻋﺮﺑﻲ ﺍﺯ ﺍﻳﻦ ﺩﺳﺘﻪ ﻫﺴﺘﻨﺪ .ﺭﻭﺵ ﻫﺎﻱ ﻣﺸﺎﺑﻪ ﺗﻨﻬﺎ ﺍﺯ ﺳﺎﺧﺘﺎﺭ ﻫﺎﻱ ﺯﺑﺎﻧﻲ
ﺍﺳﺘﻔﺎﺩﻩ ﮐﺮﺩﻩﺍﻧﺪ ﮐﻪ ﺩﺭ ﻧﺘﻴﺠﻪ ﻧﺘﺎﻳﺞ ﺁﻧﻬﺎ ﻗﺎﺑﻞ ﺑﻬﺘﺮ ﺷﺪﻥ ﻣﻲ ﺑﺎﺷﺪ .ﺑﺮﺍﻱ
ﺭﻳﺸﻪﻳﺎﺑﻲ ﺍﻓﻌﺎﻝ ﺩﺭ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ ﺍﻟﮕﻮﺭﻳﺘﻢﻫﺎﻱ ﮐﺮﺍﻭﺗﺰ ﻭ ﮐﺮﺍﻭﺗﺰ ۲ﺭﺍ
ﮔﺴﺘﺮﺵ ﺩﺍﺩﻩﺍﻳﻢ.
١‐٣
ﻓﻌﻞ
ﻃﺮﻳﻘﻪ ﺳﺎﺧﺖ
ﻣﺎﺿﻲ ﺳﺎﺩﻩ
ﺑﻦ ﻣﺎﺿﻲ }+ﻡ,ﻱ؛ﻳﻢ,ﺗﺪ,ﻧﺪ(
ﻣﺎﺿﻲ ﺍﺳﺘﻤﺮﺍﺭﻱ
ﻣﻲ +ﻣﺎﺿﻲ ﺳﺎﺩﻩ
ﻣﺎﺿﻲ ﻧﻘﻠﻲ
ﺑﻦ ﻣﺎﺿﻲ +ﻩ } +ﺍﻡ ,ﺍﻱ ,ﺍﺳﺖ ,ﺍﻳﻢ ,ﺍﻳﺪ ,ﺍﻧﺪ {
ﻣﺎﺿﻲ ﺑﻌﻴﺪ
ﺑﻦ ﻣﺎﺿﻲ+ﻩ+ﺑﻮﺩ}+ﻡ,ﻱ,ﻳﻢ,ﻳﺪ,ﻧﺪ{
ﻣﺎﺿﻲ ﺍﻟﺘﺰﺍﻣﻲ
ﺑﻦ ﻣﺎﺿﻲ +ﻩ+ﺑﺎﺵ} +ﻡ,ﻱ,ﻳﻢ,ﻳﺪ,ﻧﺪ{
ﻣﻀﺎﺭﻉ ﺍﺧﺒﺎﺭﻱ
ﻣﻲ +ﺑﻦ ﻣﻀﺎﺭﻉ } +ﻡ,ﻱ,ﺩ,ﻳﻢ,ﻳﺪ,ﻧﺪ{
ﻣﻀﺎﺭﻉ ﺳﺎﺩﻩ
ﺏ +ﺑﻦ ﻣﻀﺎﺭﻉ } +ﻡ,ﻱ,ﺩ,ﻳﻢ,ﻳﺪ,ﻧﺪ{
ﻣﻀﺎﺭﻉ ﺳﺎﺩﻩﻣﻨﻔﻲ
ﻥ +ﺑﻦ ﻣﻀﺎﺭﻉ } +ﻡ,ﻱ,ﺩ,ﻳﻢ,ﻳﺪ,ﻧﺪ{
ﻓﻌﻞ ﺍﻣﺮ
ﺏ +ﺑﻦ ﻣﻀﺎﺭﻉ ,""}+ﻳﻢ,ﻳﺪ{
ﻓﻌﻞ ﺍﻣﺮ ﻣﻨﻔﻲ
ﻥ +ﺑﻦ ﻣﻀﺎﺭﻉ ,""}+ﻳﻢ,ﻳﺪ{
ﺩﻳﮕﺮ ﺍﻓﻌﺎﻝ ﻣﻨﻔﻲ
ﻥ } +ﻣﺎﺿﻲ ﺳﺎﺩﻩ ,ﻣﺎﺿﻲ ﺍﺳﺘﻤﺮﺍﺭﻱ ,ﻣﺎﺿﻲ ﻧﻘﻠﻲ,
ﻣﺎﺿﻲ ﺑﻌﻴﺪ ,ﻣﺎﺿﻲ ﺍﻟﺘﺰﺍﻣﻲ ,ﻣﻀﺎﺭﻉ ﺍﺧﺒﺎﺭﻱ{
ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﺑﺮﺍﻱ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ
ﺩﺭ ﺍﺑﺘﺪﺍ ﺑﻪ ﺑﺮﺭﺳﻲ ﻃﺮﻳﻘﻪ ﺳﺎﺧﺖ ﺍﻓﻌﺎﻝ ﺩﺭ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ ﻣﻲﭘﺮﺩﺍﺯﻳﻢ .ﺩﺭ
ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ ﺩﻭ ﺭﻳﺸﻪ ﺑﺮﺍﻱ ﻫﺮ ﻋﻤﻞ ﻭﺟﻮﺩ ﺩﺍﺭﺩ .ﺑﻦ ﻣﺎﺿﻲ ﻭ ﺑﻦ ﻣﻀﺎﺭﻉ.
ﺑﻪ ﻋﻨﻮﺍﻥ ﻣﺜﺎﻝ ﺑﺮﺍﻱ ﻓﻌﻞ ﺩﻳﺪﻥ ﺩﻭ ﺑﻦ )ﺩﻳﺪ :ﺑﻦ ﻣﺎﺿﻲ( ﻭ )ﺑﻴﻦ :ﺑﻦ
ﻣﻀﺎﺭﻉ( ﻣﻲﺷﻨﺎﺳﻴﻢ.
ﺗﻤﺎﻡ ﺍﻓﻌﺎﻝ ﺳﺎﺩﻩ ﺩﺭ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ ﺑﺎ ﺍﻓﺰﻭﺩﻥ ﭘﻴﺶﻭﻧﺪﻫﺎ ﻭ ﭘﺲﻭﻧﺪﻫﺎ ﺑﻪ
ﻓﻌﻞ ﺑﻪ ﻭﺟﻮﺩ ﻣﻲﺁﻳﻨﺪ .ﺩﺭ ﺟﺪﻭﻝ ۲ﺗﻤﺎﻡ ﻗﻮﺍﻧﻴﻦ ﺳﺎﺧﺖ ﺍﻓﻌﺎﻝ ﻋﻤﺪﻩ
ﻓﺎﺭﺳﻲ ﺭﺍ ﺁﻭﺭﺩﻩﺍﻳﻢ.
ﺩﺭ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ ﺗﻐﻴﻴﺮﺍﺕ ﺩﺭ ﺑﻦ ﻣﺎﺿﻲ ﻭ ﻣﻀﺎﺭﻉ ﺗﻨﻬﺎ ﺑﻪ ﻣﻮﺍﺭﺩﻱ ﮐﻪ ﺑﻦ
ﻣﺎﺿﻲ ﻳﺎ ﻣﻀﺎﺭﻉ ﺑﺎ ﻳﮏ ﺣﺮﻑ ﺻﺪﺍ ﺩﺍﺭ ﺁﻏﺎﺯ ﻳﺎ ﺧﺎﺗﻤﻪ ﻣﻲ ﻳﺎﺑﺪ ﺩﺭ ﻣﻮﺍﺭﺩ
ﺧﺎﺻﻲ ﺻﻮﺭﺕ ﻣﻲ ﮔﻴﺮﺩ .ﮐﻪ ﺍﻳﻢ ﻣﻮﺍﺭﺩ ﻫﻢ ﺩﺭ ﺍﻟﮕﻮﺭﻳﺘﻢ ﺩﺭ ﻧﻈﺮ ﮔﺮﻓﺘﻪ
ﺷﺪﻩﺍﻧﺪ.
ﺩﺭ ﺟﺪﻭﻝ ﺷﻤﺎﺭﻩ ۳ﻭ ۴ﺗﻮﺍﺑﻊ ﻭ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﮐﻪ ﺑﺮﺍﻱ ﺭﻳﺸﻪ ﻳﺎﺑﻲ
ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ ﻣﻌﺮﻓﻲ ﺷﺪﻩ ﺁﻣﺪﻩ ﺍﺳﺖ .ﺍﺻﻮﻝ ﮐﻠﻲ ﺍﻟﮕﻮﺭﻳﺘﻢ ﺑﺮ ﺍﺳﺎﺱ
ﺟﺪﻭﻝ ۲ﺑﺪﺳﺖ ﺁﻣﺪﻩ ﺍﺳﺖ.
٢‐٣
ﺟﺪﻭﻝ ۲ﻟﻴﺴﺖ ﺍﻓﻌﺎﻝ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ
ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ٢ﺑﺮﺍﻱ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ
ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﻫﺮﭼﻨﺪ ﺩﺍﺭﺍﻱ ﻧﺘﺎﻳﺞ ﺧﻮﺑﻲ ﺑﻮﺩﻩ ﺍﺳﺖ ،ﺍﻣﺎ ﺩﺭ ﺟﺪﻭﻝ ۵
ﻣﺜﺎﻝﻫﺎﻳﻲ ﺭﺍ ﻣﺸﺎﻫﺪﻩ ﻣﻲﮐﻨﻴﺪ ﮐﻪ ﺍﻳﻦ ﺍﻟﮕﻮﺭﻳﺘﻢ ﻧﻤﻲﺗﻮﺍﻧﺪ ﺑﻪ ﺟﻮﺍﺏ
ﺩﺭﺳﺖ ﺩﺳﺖ ﻳﺎﺑﺪ.
ﺩﻟﻴﻞ ﺍﻳﻦ ﻣﺴﺎﻟﻪ ﺩﺭ ﻗﺒﻞ ﻫﻢ ﺷﺮﺡ ﺩﺍﺩﻩ ﺷﺪ .ﺍﺯ ﺁﻧﺠﺎ ﮐﻪ ﮐﻠﻤﺎﺕ "ﺭﻭﻡ" ﻭ
"ﻣﺎﻧﻨﺪ" ﺩﺭ ﻓﺮﻫﻨﮓ ﻟﻐﺖ ﻭﺟﻮﺩ ﺩﺍﺭﺩ ﺍﻳﻦ ﺍﺷﺘﺒﺎﻫﺎﺕ ﺭﺥ ﻣﻲ ﺩﻫﺪ .ﺑﻪ ﻫﻤﻴﻦ
ﻣﻨﻈﻮﺭ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ۲ﻫﻢ ﺑﺮﺍﻱ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ ﻧﻮﺷﺘﻪ ﺷﺪﻩ ﺍﺳﺖ .
ﺩﺭ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ۲ﺩﺭ ﻫﺮ ﻗﺴﻤﺖ ﻣﺸﺨﺺ ﻣﻲ ﮐﻨﻴﻢ ﮐﻪ ﭼﻪ ﻧﻮﻉ ﮐﻠﻤﻪ
ﺍﻱ ) ﺑﻦ ﻣﻀﺎﺭﻉ ﻳﺎ ﺑﻦ ﻣﺎﺿﻲ( ﻣﻮﺭﺩ ﻗﺒﻮﻝ ﺍﺳﺖ .ﺍﻳﻦ ﺍﻟﮕﻮﺭﻳﺘﻢ ﺗﻮﺍﻧﺴﺘﻪ
ﺍﺳﺖ ﺍﺷﺘﺒﺎﻫﺎﺕ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﺭﺍ ﺗﺎ ﺣﺪﻭﺩﻱ ﺑﺮﻃﺮﻑ ﮐﻨﺪ ﮐﻪ ﺩﺭ ﺑﺨﺶ
ﻧﺘﺎﻳﺞ ﺷﺮﺡ ﺩﺍﺩﻩﺍﻳﻢ.
ﺟﺪﻭﻝ ٣ﻟﻴﺴﺖ ﺗﻮﺍﺑﻊ ﻣﻮﺭﺩ ﺍﺳﺘﻔﺎﺩﻩ ﺩﺭ ﮐﺮﺍﻭﺗﺰ ۲ﺑﺮﺍﻱ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ
ﮐﻠﻤﻪ wﺭﺍ ﺩﺭ ﻓﺮﻫﻨﮓ ﻟﻐﺖ ﺟﺴﺘﺠﻮ ﻣﻲﮐﻨﺪ.
)Find (w
ﺍﮔﺮ ﭘﻴﺸﻮﻧﺪ pfrﺩﺭ ﮐﻠﻤﻪ Wﻭﺟﻮﺩ ﺩﺍﺷﺘﻪ ﺑﺎﺷﺪ
ﺩﺭﺳﺖ ﺧﻮﺍﻫﺪ ﺑﻮﺩ.
)Prefix (W, prf
ﺍﮔﺮ ﭘﺴﻮﻧﺪ pofﺩﺭ ﮐﻠﻤﻪ Wﻭﺟﻮﺩ ﺩﺍﺷﺘﻪ ﺑﺎﺷﺪ
ﺩﺭﺳﺖ ﺧﻮﺍﻫﺪ ﺑﻮﺩ.
) Postfix (W, pof
ﭘﻴﺸﻮﻧﺪ prfﺭﺍ ﺍﺯ ﺍﺑﺘﺪﺍﻱ ﮐﻠﻤﻪ wﺣﺬﻑ ﻣﻲﮐﻨﺪ.
)RePrefix (W, prf
ﭘﺴﻮﻧﺪ prfﺭﺍ ﺍﺯ ﺍﻧﺘﻬﺎﻱ ﮐﻠﻤﻪ wﺣﺬﻑ ﻣﻲﮐﻨﺪ.
) RePostfix (W, pof
ﺑﺮﺍﻱ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ۲ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ۴ ﺟﺪﻭﻝ
Word: The word for stemming.
Result: result of stemming.
Æ
//if the word is in dictionary don’t need stemming
If(PostFix(Word,{ “”م, “” ي, “” د,” ”ﻳﻢ,” ”ﻳﺪ,“{)} ” ﻧﺪ
W1 = Word;
RePostFix(Word, { “”م, “” ي, “” د,” ”ﻳﻢ,” ”ﻳﺪ,“;)} ” ﻧﺪ
If find(Word)
If(find(Word){
Result += Word + “+”;
Result = Word;
//This part if for werb begin with “”ﻣﻲ
Result += { “”م, “” ي, “” د,” ”ﻳﻢ,” ”ﻳﺪ,“;} ” ﻧﺪ
ﻣﻀﺎرع اﺧﺒﺎري و ﻣﺎﺿﻲ اﻟﺘﺰاﻣﻲ//
Return;}}
If (PreFix (Word,”{)”ﻣﻲ
Word = W1;
Reprefix(Word,”;)”ﻣﻲ
ﻣﺎﺿﻲ ﺳﺎدﻩ//
Result +=”;”ﻣﻲ
If(PostFix(Word,{ “”م, “” ي, “ ”ﻳﻢ,” ”ﻳﺪ,“{)} ” ﻧﺪ
If find(Word){
RePostFix(Word, { “”م, “” ي,” ”ﻳﻢ,” ”ﻳﺪ,“;)} ” ﻧﺪ
If(find(Word){
Result += Word;
Return;
Result += Word + “+”;
}
If(PostFix(Word,{ “”م, “” ي, “” د,” ”ﻳﻢ,” ”ﻳﺪ,“{)} ” ﻧﺪ
Result += { “”م, “” ي,” ”ﻳﻢ,” ”ﻳﺪ,“;} ” ﻧﺪ
RePostFix(Word, { “”م, “” ي, “” د,” ”ﻳﻢ,” ”ﻳﺪ,“;)} ” ﻧﺪ
Return;}
If(find(Word){
Word = W1;
Result += Word + “+”;
ﻣﺎﺿﻲ ﻧﻘﻠﻲ//
Result += { “”م, “” ي, “” د,” ”ﻳﻢ,” ”ﻳﺪ,“;} ” ﻧﺪ
If(PostFix(Word,{ “”ﻩ ام, “”ﻩ ا ي, “ ”ﻩ اﺳﺖ,” ”ﻩ اﻳﻢ,“ ” ﻩ اﻳﺪ,
Return;}}
RePostFix(Word, { “”ﻩ ام, “”ﻩ ا ي, “ ”ﻩ اﺳﺖ,” ”ﻩ اﻳﻢ,“ ” ﻩ اﻳﺪ,
“{)}”ﻩ اﻧﺪ
“;)}”ﻩ اﻧﺪ
If(find(Word){
Word = W1;
Result += Word + “+”;
اﻣﺮ و ﻣﺎﺿﻲ ﺳﺎدﻩ//
Result += { “”ﻩ ام, “”ﻩ ا ي, “ ”ﻩ اﺳﺖ,” ”ﻩ اﻳﻢ,“ ” ﻩ اﻳﺪ, “;}”ﻩ اﻧﺪ
If (PreFix (Word,”{)”ب
Reprefix(Word,”;)”ب
Return;}
Result +=”;”ب
Word = W1;
If find(Word){
ﻣﺎﺿﻲ ﺑﻌﻴﺪ//
Result += Word;
If(PostFix(Word,{ “”ﻩ ﺑﻮدم, “”ﻩ ﺑﻮدي, “ ”ﻩ ﺑﻮد,” ”ﻩ ﺑﻮدﻳﻢ,“ ” ﻩ ﺑﻮدﻳﺪ, “{)}”ﻩ ﺑﻮدﻧﺪ
Return;
RePostFix(Word, { “”ﻩ ﺑﻮدم, “”ﻩ ﺑﻮدي, “ ”ﻩ ﺑﻮد,” ”ﻩ ﺑﻮدﻳﻢ,
}
Result += Word + “+”;
RePostFix(Word, { “”م, “” ي, “” د,” ”ﻳﻢ,” ”ﻳﺪ,“;)} ” ﻧﺪ
If(find(Word){
Result += Word + “+”;
“ ” ﻩ ﺑﻮدﻳﺪ, “;)}”ﻩ ﺑﻮدﻧﺪ
If(find(Word){
If(PostFix(Word,{ “”م, “” ي, “” د,” ”ﻳﻢ,” ”ﻳﺪ,“{)} ” ﻧﺪ
Result += { “”ﻩ ﺑﻮدم, “”ﻩ ﺑﻮدي, “ ”ﻩ ﺑﻮد,” ”ﻩ ﺑﻮدﻳﻢ,“ ” ﻩ ﺑﻮدﻳﺪ,
“;}”ﻩ ﺑﻮدﻧﺪ
Return;}
Result += { “”م, “” ي, “” د,” ”ﻳﻢ,” ”ﻳﺪ,“;} ” ﻧﺪ
Word = W1;
Return;}}
ﻣﺎﺿﻲ اﻟﺘﺰاﻣﻲ//
اﻣﺮ و ﻣﺎﺿﻲ ﺳﺎدﻩ ﻣﻨﻔﻲ//
If(PostFix(Word,{ “”ﻩ ﺑﺎﺷﻢ, “”ﻩ ﺑﺎﺷﻲ, “ ”ﻩ ﺑﺎﺷﺪ,” ”ﻩ ﺑﺎﺷﻴﻢ,
“ ” ﻩ ﺑﺎﺷﻴﺪ, “{)}”ﻩ ﺑﺎﺷﻨﺪ
If (PreFix (Word,”{)”ن
RePostFix(Word, { “”ﻩ ﺑﺎﺷﻢ, “”ﻩ ﺑﺎﺷﻲ, “ ”ﻩ ﺑﺎﺷﺪ,” ”ﻩ ﺑﺎﺷﻴﻢ,
“ ” ﻩ ﺑﺎﺷﻴﺪ, “;)}”ﻩ ﺑﺎﺷﻨﺪ
If(find(Word){
Reprefix(Word,”;)”ن
Result += Word + “+”;
Result +=”;”ن
If find(Word){
Result += Word;
Return;}
Æ
Result += { “”ﻩ ﺑﺎﺷﻢ, “”ﻩ ﺑﺎﺷﻲ, “ ”ﻩ ﺑﺎﺷﺪ,” ”ﻩ ﺑﺎﺷﻴﻢ,“ ” ﻩ ﺑﺎﺷﻴﺪ, “;}”ﻩ ﺑﺎﺷﻨﺪ
Return;}
Word = W1;
If (prefix(Word,”)”ن
{//Remove the “ ”نfrom prefix and check all part for it according the table 1}
ﺟﺪﻭﻝ ۵ﺑﺮﺧﻲ ﺍﺯ ﻣﺜﺎﻝﻫﺎﻳﻲ ﮐﻪ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﭘﺎﺳﺦ ﻧﺎﺩﺭﺳﺖ ﺑﻪ ﺁﻧﻬﺎ ﺩﺍﺩﻩ ﺍﺳﺖ.
ﮐﻠﻤﻪ
ﺭﻳﺸﻪ ﻳﺎﻓﺖ ﺷﺪﻩ
ﺭﻳﺸﻪ ﺩﺭﺳﺖ
ﻣﻲﺭﻭﻡ
ﻣﻲ +ﺭﻭﻡ
ﻣﻲ +ﺭﻭ +ﻡ
ﻣﻲﻣﺎﻧﻨﺪ
ﻣﻲ +ﻣﺎﻧﻨﺪ
ﻣﻲ +ﻣﺎﻥ +ﻧﺪ
‐٤ﻧﺘﺎﻳﺞ ﺑﺪﺳﺖ ﺁﻣﺪﻩ
ﺑﺮﺍﻱ ﺁﺯﻣﻮﺩﻥ ﺍﻳﻦ ﺍﻟﮕﻮﺭﻳﺘﻢﻫﺎ ﺩﺭ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ ﺍﺯ ۱۰۰۰ﺟﻤﻠﻪ ﻓﺎﺭﺳﻲ
ﺍﺳﺘﻔﺎﺩﻩ ﺷﺪﻩ ﺍﺳﺖ .ﺳﭙﺲ ﺗﻤﺎﻡ ﺍﻓﻌﺎﻝ ﺗﻮﺳﻂ ﺍﻟﮕﻮﺭﻳﺘﻢﻫﺎﻱ ﺑﺎﻻ ﺭﻳﺸﻪﻳﺎﺑﻲ
ﺷﺪﻩ ﺍﺳﺖ .ﺩﺭ ﻧﻬﺎﻳﺖ ﻧﺘﺎﻳﺞ ﺟﺪﻭﻝ ۶ﺑﺪﺳﺖ ﺁﻣﺪﻩ ﺍﺳﺖ .ﻫﻤﺎﻥ ﻃﻮﺭ ﮐﻪ
ﺩﻳﺪﻩ ﻣﻲﺷﻮﺩ ،ﻫﺮ ﺩﻭ ﺍﻟﮕﻮﺭﻳﺘﻢ ﺩﺍﺭﺍﻱ ﻧﺘﺎﻳﺞ ﺧﻮﺑﻲ ﻫﺴﺘﻨﺪ .ﺍﺯ ﺗﻌﺪﺍﺩ ۲۲۳
ﮐﻠﻤﻪﺍﻱ ﮐﻪ ﺑﺎ ﺍﻳﻦ ﺍﻟﮕﻮﺭﻳﺘﻢ ﻫﺎ ﺗﺴﺖ ﺷﺪﻩ ﺍﺳﺖ ،ﺗﻨﻬﺎ ۱۳ﻣﻮﺭﺩ ﺩﺭ
ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﺍﺷﺘﺒﺎﻩ ﺭﻳﺸﻪ ﻳﺎﺑﻲ ﺷﺪﻩ .ﺍﻳﻦ ﺭﻗﻢ ﺩﺭ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ۲ﺑﻪ
۴ﻣﻮﺭﺩ ﺗﻘﻠﻴﻞ ﻳﺎﻓﺘﻪ ﺍﺳﺖ .ﺑﻪ ﻋﺒﺎﺭﺕ ﺩﻳﮕﺮ ﺑﺎ ﺍﺳﺘﻔﺎﺩﻩ ﺍﺯ ﺑﻬﻴﻨﻪ ﺳﺎﺯﻱ
ﺍﻧﺠﺎﻡ ﺷﺪﻩ ﻣﻲ ﺗﻮﺍﻥ ﺣﺪﻭﺩ ۶۰ﺩﺭﺻﺪ ﺍﺷﺘﺒﺎﻫﺎﺕ ﺭﺍ ﺍﺻﻼﺡ ﮐﺮﺩﻩ ﺍﺳﺖ.
ﻧﮑﺘﻪ ﻗﺎﺑﻞ ﺗﻮﺟﻪ ﺍﻳﻨﮑﻪ ﺩﺭ ﺗﻨﻬﺎ ﺩﺭ ﻣﻮﺭﺩ ﺍﻓﻌﺎﻝ ﻣﺮﮐﺐ ﺍﺷﺘﺒﺎﻩ ﺭﺥ ﺩﺍﺩﻩ
ﺍﺳﺖ ،ﮐﻪ ﺑﺮﺍﻱ ﺁﻧﻬﺎ ﻗﺎﻧﻮﻧﻲ ﻭﺟﻮﺩ ﻧﺪﺍﺷﺘﻪ ﺍﺳﺖ.
ﻫﻤﺎﻥ ﻃﻮﺭ ﮐﻪ ﻣﻼﺣﻈﻪ ﻣﻲ ﺷﻮﺩ ﺍﻟﮕﻮﺭﻳﺘﻢﻫﺎ ﻫﺮ ﺩﻭ ﺩﺍﺭﺍﻱ ﻧﺘﺎﻳﺞ ﺧﻮﺑﻲ
ﺑﻮﺩﻩﺍﻧﺪ ﮐﻪ ﺍﻟﺒﺘﻪ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ۲ﺑﺮﺍﻱ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ ﺑﻬﺘﺮ ﻋﻤﻞ ﮐﺮﺩﻩ
ﺍﺳﺖ.
ﺑﺮﺍﻱ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ ﮐﻪ ﺩﺍﺭﺍﻱ ﺳﺎﺧﺘﺎﺭ ﺭﻳﺨﺘﻲ ﻣﻨﻈﻤﻲ ﺍﺳﺖ ﺍﻟﮕﻮﺭﻳﺘﻢ ﻫﺎﻱ
ﺭﻳﺸﻪﻳﺎﺑﻲ ﺯﻳﺎﺩﻱ ﻣﻌﺮﻓﻲ ﺷﺪﻩ ﺍﺳﺖ ،ﺍﻣﺎ ﺍﺳﺘﻔﺎﺩﻩ ﺍﺯ ﻧﻘﺶ ﮐﻠﻤﺎﺕ ﻫﻤﺎﻥ
ﮔﻮﻧﻪ ﮐﻪ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﺭﺍ ﺑﻬﻴﻨﻪ ﮐﺮﺩﻩ ﺍﺳﺖ .ﺗﺎﺛﻴﺮ ﺧﻮﺑﻲ ﻫﻢ ﺑﺮ ﺭﻭﻱ
ﺍﻟﮕﻮﺭﻳﺘﻢ ﻫﺎﻱ ﺭﻳﺨﺖ ﺷﻨﺎﺳﻲ ﻓﺎﺭﺳﻲ ﺩﺍﺷﺖ.
ﺑﺎ ﺍﻳﻦ ﺣﺎﻝ ﺍﻳﻦ ﺭﻭﺵ ﺭﺍ ﺗﻨﻬﺎ ﺑﺮﺍﻱ ﺍﻓﻌﺎﻝ ﺑﻪ ﮐﺎﺭ ﺑﺮﺩﻩﺍﻳﻢ ﮐﻪ ﺑﺮﺍﻱ ﺩﻳﮕﺮ
ﮔﺮﻭﻩﻫﺎﻱ ﺍﺳﺎﻣﻲ ﻗﺎﺑﻞ ﺗﻌﻤﻴﻢ ﺍﺳﺖ.
‐٦ﻣﺮﺍﺟﻊ
[1] R. Krovetz, 1993: “Viewing morphology as an inference
process”, in R. Korfhage et al., Proc. 16th ACM SIGIR
Conference, Pittsburgh, June 27-July 1, 1993; pp. 191-202.
[2] J. P. Callan, M. Connell, A. Du. “Automatic discovery of
language models for text databases”, ACM SIGMOD
International Conference on Management of Data, pages 479490, 1999.
[3] K. Aas and L. Eikvil, “Text Categorisation: A Survey”,
http://citeseer.nj.nec.com/ , aas99text.html, 1999.
‐٥ﻧﺘﻴﺠﻪﮔﻴﺮﻱ
[4] M. F. Porter. “An algorithm for suffix stripping”, Program,
14(3):130t137, 1980.
ﺩﺭ ﺍﻳﻦ ﻣﻘﺎﻟﻪ ﺭﻭﻳﮑﺮﺩﻫﺎﻱ ﻣﺨﺘﻠﻒ ﺭﺍ ﺑﻪ ﻣﺴﺎﻟﻪ ﺭﻳﺸﻪﻳﺎﺑﻲ ﻣﻮﺭﺩ ﺑﺮﺭﺳﻲ ﻗﺮﺍﺭ
ﺩﺍﺩﻳﻢ .ﺭﻳﺸﻪ ﻳﺎﺑﻲ ﺩﺭ ﺑﺴﻴﺎﺭﻱ ﺍﺯ ﺳﺎﻣﺎﻧﻪ ﻫﺎﻱ ﭘﺮﺩﺍﺯﺵ ﺯﺑﺎﻥ ﻃﺒﻴﻌﻲ ﮐﺎﺭﺑﺮﺩ
ﺩﺍﺭﺩ .ﺍﺯ ﻣﻴﺎﻥ ﺩﻭ ﺭﻭﻳﮑﺮﺩ ﺑﻪ ﺣﻞ ﻣﺴﺎﻟﻪ ﺭﻭﻳﮑﺮﺩ ﺭﻳﺨﺖ ﺷﻨﺎﺳﻲ ﺭﺍ ﺑﺮﺭﺳﻲ
ﮐﺮﺩﻳﻢ ﻭ ﻳﮑﻲ ﺍﺯ ﺍﻳﻦ ﺭﻭﺵﻫﺎ ﮐﻪ ﺍﻟﮕﻮﺭﻳﺘﻢ ﮐﺮﺍﻭﺗﺰ ﺑﻮﺩ ﻣﻌﺮﻓﻲ ﺷﺪ .ﺑﺎ
ﺍﺳﺘﻔﺎﺩﻩ ﺍﺯ ﺍﻳﻦ ﺍﻟﮕﻮﺭﻳﺘﻢ ﻧﺴﺨﻪ ﺟﺪﻳﺪﻱ ﺍﺯ ﺭﻭﺵﻫﺎﻱ ﺭﻳﺸﻪﻳﺎﺑﻲ ﺭﺍ ﻣﻌﺮﻓﻲ
ﮐﺮﺩﻳﻢ ﮐﻪ ﺍﺯ ﻧﻘﺶ ﮐﻠﻤﺎﺕ ﺑﺮﺍﻱ ﺭﻳﺸﻪﻳﺎﺑﻲ ﺍﺳﺘﻔﺎﺩﻩ ﺷﺪﻩ ﺍﺳﺖ.
[5] E.M Voorhees, “using wordnet for text retrieval”,
WordNet an electronic lexical Database, Page 285-303, MIT
Press.
[6] Kazem Taghva, Russell Beckley, Mohammad Sadeh, “A
Stemming Algorithm for the Farsi Language”. ITCC (1): 158162, 2005.
ﺟﺪﻭﻝ ٦ﻧﺘﺎﻳﺞ ﺑﺪﺳﺖ ﺁﻣﺪﻩ ﺑﺮﺍﻱ ﺍﻟﮕﻮﺭﻳﺘﻢ ﻫﺎﻱ ﺭﻳﺸﻪ ﻳﺎﺑﻲ ﺩﺭ ﺯﺑﺎﻥ ﻓﺎﺭﺳﻲ
ﺩﺭﺻﺪ ﺩﺭﺳﺘﻲ
ﻧﺘﺎﻳﺞ ﻧﺎﺩﺭﺳﺖ
ﻧﺘﺎﻳﺞ ﺩﺭﺳﺖ
ﺗﻌﺪﺍﺩ ﮐﻞ ﮐﻠﻤﺎﺕ
ﺍﻟﮕﻮﺭﻳﺘﻢ ﻣﻮﺭﺩ ﺍﺳﺘﻔﺎﺩﻩ
٩۴٫١٧
١٣
٢١٠
٢٢٣
ﮐﺮاوﺗﺰ
٩٨٫٢٠
۴
٢١٩
٢٢٣
ﮐﺮاوﺗﺰ٢
© Copyright 2026 Paperzz