{\rtf1\ansi\ansicpg1252\uc1 \deff0\deflang1033\deflangfe1033{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;}
{\f2\fmodern\fcharset0\fprq1{\*\panose 02070309020205020404}Courier New;}{\f3\froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;}{\f4\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times;}
{\f16\fswiss\fcharset0\fprq2{\*\panose 020b0602030504020204}Lucida Sans Unicode;}{\f46\froman\fcharset238\fprq2 Times New Roman CE;}{\f47\froman\fcharset204\fprq2 Times New Roman Cyr;}{\f49\froman\fcharset161\fprq2 Times New Roman Greek;}
{\f50\froman\fcharset162\fprq2 Times New Roman Tur;}{\f51\froman\fcharset186\fprq2 Times New Roman Baltic;}{\f52\fswiss\fcharset238\fprq2 Arial CE;}{\f53\fswiss\fcharset204\fprq2 Arial Cyr;}{\f55\fswiss\fcharset161\fprq2 Arial Greek;}
{\f56\fswiss\fcharset162\fprq2 Arial Tur;}{\f57\fswiss\fcharset186\fprq2 Arial Baltic;}{\f58\fmodern\fcharset238\fprq1 Courier New CE;}{\f59\fmodern\fcharset204\fprq1 Courier New Cyr;}{\f61\fmodern\fcharset161\fprq1 Courier New Greek;}
{\f62\fmodern\fcharset162\fprq1 Courier New Tur;}{\f63\fmodern\fcharset186\fprq1 Courier New Baltic;}{\f142\fswiss\fcharset238\fprq2 Lucida Sans Unicode CE;}{\f143\fswiss\fcharset204\fprq2 Lucida Sans Unicode Cyr;}
{\f145\fswiss\fcharset161\fprq2 Lucida Sans Unicode Greek;}{\f146\fswiss\fcharset162\fprq2 Lucida Sans Unicode Tur;}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;
\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}
{\stylesheet{\widctlpar\adjustright \fs22\lang2057\cgrid \snext0 Normal;}{\s1\fi-432\li432\sb240\sa60\keepn\widctlpar\jclisttab\tx432\ls2\adjustright \b\f1\fs28\lang2057\kerning28\cgrid \sbasedon0 \snext0 heading 1;}{
\s2\fi-576\li576\sb240\sa60\keepn\widctlpar\jclisttab\tx576\ls2\ilvl1\adjustright \b\i\f1\lang2057\cgrid \sbasedon0 \snext0 heading 2;}{\s3\qc\sb100\sa100\keepn\nowidctlpar\adjustright \lang2057 \sbasedon0 \snext0 heading 3;}{
\s4\qj\sb240\sa60\keepn\nowidctlpar\adjustright \b\fs22\lang2057 \sbasedon0 \snext0 heading 4;}{\*\cs10 \additive Default Paragraph Font;}{\s15\qc\sa200\nowidctlpar\outlinelevel0\adjustright \b\kerning28 \sbasedon0 \snext16 LREC author name;}{
\s16\qc\nowidctlpar\outlinelevel0\adjustright \fs20\kerning28 \sbasedon0 \snext17 LREC affiliation;}{\s17\qc\sb240\widctlpar\adjustright \b\fs20\cgrid \sbasedon0 \snext18 LREC heading Abstract;}{\s18\qj\sl-200\slmult0\widctlpar\adjustright \fs18\cgrid 
\sbasedon0 \snext18 LREC abstract text;}{\s19\qc\sa240\nowidctlpar\outlinelevel0\adjustright \b\fs28\kerning28 \sbasedon0 \snext15 LREC title;}{\s20\qc\fi-360\li360\sb240\sa60\sl-220\slmult0\widctlpar\jclisttab\tx360\ls38\adjustright \b\cgrid 
\sbasedon0 \snext21 LREC Heading 1;}{\s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid \sbasedon0 \snext21 LREC main body text;}{\s22\qj\fi-567\li567\sb240\sa60\sl-220\slmult0\widctlpar\jclisttab\tx567\ls38\ilvl1\adjustright \b\fs22\cgrid 
\sbasedon0 \snext21 LREC Heading 2;}{\s23\qj\fi-680\li680\sb240\sl-220\slmult0\widctlpar\jclisttab\tx680\ls38\ilvl2\adjustright \b\fs20\cgrid \sbasedon0 \snext21 LREC Heading 3;}{\s24\qc\sb240\widctlpar\adjustright \fs20\lang2057\cgrid 
\sbasedon0 \snext24 LREC caption;}{\s25\qj\fi-198\li198\sl-220\slmult0\widctlpar\adjustright \fs20\lang2057\cgrid \sbasedon0 \snext25 LREC biblio references;}{\s26\qj\widctlpar\adjustright \fs18\lang2057\cgrid \sbasedon0 \snext26 LREC footnote;}{
\s27\widctlpar\adjustright \fs22\cf1\lang1032\cgrid \sbasedon0 \snext27 Body Text;}{\s28\qj\sl360\slmult1\widctlpar\tx0\adjustright \fs22\cf1\lang1032\cgrid \sbasedon0 \snext28 Body Text 2;}{\s29\qj\sl360\slmult1\widctlpar\tx0\adjustright 
\cf1\lang1032\cgrid \sbasedon0 \snext29 Body Text 3;}{\s30\widctlpar\adjustright \fs18\lang2057\cgrid \sbasedon0 \snext30 footnote text;}{\*\cs31 \additive \super \sbasedon10 footnote reference;}{\*\cs32 \additive \ul\cf2 \sbasedon10 Hyperlink;}{
\s33\widctlpar\adjustright \f4\lang1044\cgrid \sbasedon0 \snext0 Date;}{\s34\sb120\sa120\widctlpar\adjustright \b\fs22\lang2057\cgrid \sbasedon0 \snext0 caption;}{\s35\widctlpar\adjustright \f2\fs20\lang2057\cgrid \sbasedon0 \snext35 Plain Text;}}
{\*\listtable{\list\listtemplateid-1309772410{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\b\i0\strike0\outl0\shad0\embo0\impr0\caps0\v0\f0\fs24\cf0\nosupersub\striked0\fbias0 
\s22\fi-510\li510\jclisttab\tx510 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}\b\i0\strike0\outl0\shad0\embo0\impr0\caps0\v0\f0\fs22\cf0\nosupersub\striked0\fbias0 
\fi-567\li567\jclisttab\tx567 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'06\'00.\'01.\'02.;}{\levelnumbers\'01\'03\'05;}
\b\i0\strike0\outl0\shad0\embo0\impr0\caps0\v0\f0\fs20\cf0\nosupersub\striked0\fbias0 \fi-1224\li1224\jclisttab\tx1224 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'08\'00.\'01.\'02.\'03.;}{\levelnumbers
\'01\'03\'05\'07;}\fi-648\li1728\jclisttab\tx1800 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}\fi-792\li2232\jclisttab\tx2520 }{\listlevel
\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fi-936\li2736\jclisttab\tx2880 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1
\levelspace0\levelindent0{\leveltext\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fi-1080\li3240\jclisttab\tx3600 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fi-1224\li3744\jclisttab\tx3960 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fi-1440\li4320\jclisttab\tx4680 }{\listname ;}\listid11078174}{\list\listtemplateid-554291698\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-340\li340\jclisttab\tx360 }{\listname ;}\listid46223670}{\list\listtemplateid201916417\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1
\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li360\jclisttab\tx360 }{\listname ;}\listid166986147}{\list\listtemplateid201916417\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0
\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li360\jclisttab\tx360 }{\listname ;}\listid180320404}{\list\listtemplateid201916417\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li360\jclisttab\tx360 }{\listname ;}\listid186020862}{\list\listtemplateid-554291698\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-340\li340\jclisttab\tx360 }{\listname ;}\listid190070027}{\list\listtemplateid201916417\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li360\jclisttab\tx360 }{\listname ;}\listid276107537}{\list\listtemplateid1052124966\listsimple{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'02\'00.;}{\levelnumbers\'01;}\fi-360\li360\jclisttab\tx360 }{\listname ;}\listid284580713}{\list\listtemplateid-554291698\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-340\li340\jclisttab\tx360 }{\listname ;}\listid293291867}{\list\listtemplateid-1\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelold\levelspace0\levelindent283{\leveltext
\'01\u-4051 ?;}{\levelnumbers;}\f3\fbias0 \fi-283\li1723 }{\listname ;}\listid387657069}{\list\listtemplateid201916417\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}
\f3\fbias0 \fi-360\li360\jclisttab\tx360 }{\listname ;}\listid404231543}{\list\listtemplateid-554291698\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 
\fi-340\li340\jclisttab\tx360 }{\listname ;}\listid418990668}{\list\listtemplateid68419585\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li360
\jclisttab\tx360 }{\listname ;}\listid419528341}{\list\listtemplateid-489387394{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat4\levelspace0\levelindent0{\leveltext\'01\'00;}{\levelnumbers\'01;}\fbias0 \fi-720\li720\jclisttab\tx720 }{\listlevel
\levelnfc0\leveljc0\levelfollow0\levelstartat2\levelspace0\levelindent0{\leveltext\'03\'00.\'01;}{\levelnumbers\'01\'03;}\fbias0 \fi-720\li720\jclisttab\tx720 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'05\'00.\'01.\'02;}{\levelnumbers\'01\'03\'05;}\fbias0 \fi-720\li720\jclisttab\tx720 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'07\'00.\'01.\'02.\'03;}{\levelnumbers\'01\'03\'05\'07;}\fbias0 
\fi-720\li720\jclisttab\tx720 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'09\'00.\'01.\'02.\'03.\'04;}{\levelnumbers\'01\'03\'05\'07\'09;}\fbias0 \fi-1080\li1080\jclisttab\tx1080 }{\listlevel\levelnfc0
\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0b\'00.\'01.\'02.\'03.\'04.\'05;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fbias0 \fi-1080\li1080\jclisttab\tx1080 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1
\levelspace0\levelindent0{\leveltext\'0d\'00.\'01.\'02.\'03.\'04.\'05.\'06;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fbias0 \fi-1440\li1440\jclisttab\tx1440 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0f\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fbias0 \fi-1440\li1440\jclisttab\tx1440 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'11\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fbias0 \fi-1800\li1800\jclisttab\tx1800 }{\listname ;}\listid624696602}{\list\listtemplateid201916417\listsimple{\listlevel\levelnfc23\leveljc0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li360\jclisttab\tx360 }{\listname ;}\listid757871045}{\list\listtemplateid201916417\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li360\jclisttab\tx360 }{\listname ;}\listid764230464}{\list\listtemplateid-321330758{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0
\levelindent0{\leveltext\'01\'00;}{\levelnumbers\'01;}\s1\fi-432\li432\jclisttab\tx432 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'03\'00.\'01;}{\levelnumbers\'01\'03;}\s2\fi-576\li576\jclisttab\tx576 }
{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'05\'00.\'01.\'02;}{\levelnumbers\'01\'03\'05;}\s3\fi-720\li720\jclisttab\tx720 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0
\levelindent0{\leveltext\'07\'00.\'01.\'02.\'03;}{\levelnumbers\'01\'03\'05\'07;}\s4\fi-864\li864\jclisttab\tx864 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'09\'00.\'01.\'02.\'03.\'04;}{\levelnumbers
\'01\'03\'05\'07\'09;}\fi-1008\li1008\jclisttab\tx1008 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0b\'00.\'01.\'02.\'03.\'04.\'05;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fi-1152\li1152
\jclisttab\tx1152 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0d\'00.\'01.\'02.\'03.\'04.\'05.\'06;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fi-1296\li1296\jclisttab\tx1296 }{\listlevel\levelnfc0
\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0f\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fi-1440\li1440\jclisttab\tx1440 }{\listlevel\levelnfc0\leveljc0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'11\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fi-1584\li1584\jclisttab\tx1584 }{\listname ;}\listid796603225}{\list\listtemplateid201916417
\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li360\jclisttab\tx360 }{\listname ;}\listid923026187}{\list\listtemplateid201916417\listsimple
{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li360\jclisttab\tx360 }{\listname ;}\listid929585878}{\list\listtemplateid-554291698\listsimple{\listlevel
\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-340\li340\jclisttab\tx360 }{\listname ;}\listid992030942}{\list\listtemplateid-554291698\listsimple{\listlevel\levelnfc23
\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-340\li340\jclisttab\tx360 }{\listname ;}\listid1086925978}{\list\listtemplateid-554291698\listsimple{\listlevel\levelnfc23\leveljc0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-340\li340\jclisttab\tx360 }{\listname ;}\listid1204439994}{\list\listtemplateid201916417\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li360\jclisttab\tx360 }{\listname ;}\listid1225289755}{\list\listtemplateid68419585\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1
\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li360\jclisttab\tx360 }{\listname ;}\listid1281180326}{\list\listtemplateid-1\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelold\levelspace0
\levelindent283{\leveltext\'01\u-4051 ?;}{\levelnumbers;}\f3\fbias0 \fi-283\li1723 }{\listname ;}\listid1366980872}{\list\listtemplateid68550671\listsimple{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'02\'00.;}{\levelnumbers\'01;}\fi-360\li360\jclisttab\tx360 }{\listname ;}\listid1394965037}{\list\listtemplateid432717248{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}
\b\i0\strike0\outl0\shad0\embo0\impr0\caps0\v0\f0\fs24\cf0\nosupersub\striked0\fbias0 \s23\fi-360\li360\jclisttab\tx360 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}
\b\i0\strike0\outl0\shad0\embo0\impr0\caps0\v0\f0\fs22\cf0\nosupersub\striked0\fbias0 \fi-567\li567\jclisttab\tx567 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'06\'00.\'01.\'02.;}{\levelnumbers
\'01\'03\'05;}\b\i0\strike0\outl0\shad0\embo0\impr0\caps0\v0\f0\fs20\cf0\nosupersub\striked0\fbias0 \fi-680\li680\jclisttab\tx680 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'08\'00.\'01.\'02.\'03.;}{\levelnumbers\'01\'03\'05\'07;}\fi-648\li1728\jclisttab\tx1800 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}
\fi-792\li2232\jclisttab\tx2520 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fi-936\li2736\jclisttab\tx2880 }{\listlevel
\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fi-1080\li3240\jclisttab\tx3600 }{\listlevel\levelnfc0\leveljc0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fi-1224\li3744\jclisttab\tx3960 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0
\levelindent0{\leveltext\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fi-1440\li4320\jclisttab\tx4680 }{\listname ;}\listid1658529913}{\list\listtemplateid-320029390{\listlevel\levelnfc0\leveljc0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\b\i0\strike0\outl0\shad0\embo0\impr0\caps0\v0\f0\fs24\cf0\nosupersub\striked0\fbias0 \s20\fi-360\li360\jclisttab\tx360 }{\listlevel\levelnfc0\leveljc0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}\b\i0\strike0\outl0\shad0\embo0\impr0\caps0\v0\f0\fs22\cf0\nosupersub\striked0\fbias0 \s22\fi-567\li567\jclisttab\tx567 }{\listlevel\levelnfc0\leveljc0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'06\'00.\'01.\'02.;}{\levelnumbers\'01\'03\'05;}\b\i0\strike0\outl0\shad0\embo0\impr0\caps0\v0\f0\fs20\cf0\nosupersub\striked0\fbias0 \s23\fi-680\li680\jclisttab\tx680 }{\listlevel\levelnfc0
\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'08\'00.\'01.\'02.\'03.;}{\levelnumbers\'01\'03\'05\'07;}\fi-648\li1728\jclisttab\tx1800 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}\fi-792\li2232\jclisttab\tx2520 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers
\'01\'03\'05\'07\'09\'0b;}\fi-936\li2736\jclisttab\tx2880 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fi-1080\li3240
\jclisttab\tx3600 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fi-1224\li3744\jclisttab\tx3960 }{\listlevel
\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fi-1440\li4320\jclisttab\tx4680 }{\listname ;}\listid1663847801}
{\list\listtemplateid-1982584262\listsimple{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\fi-360\li360\jclisttab\tx360 }{\listname ;}\listid1711806532}
{\list\listtemplateid-1468880766\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat0\levelspace0\levelindent0{\leveltext\'01-;}{\levelnumbers;}\fbias0 \fi-360\li2520\jclisttab\tx2520 }{\listname ;}\listid1720594893}
{\list\listtemplateid-554291698\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-340\li340\jclisttab\tx360 }{\listname ;}\listid1766030429}
{\list\listtemplateid201916417\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li360\jclisttab\tx360 }{\listname ;}\listid1766879857}
{\list\listtemplateid201916417\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li360\jclisttab\tx360 }{\listname ;}\listid1885674446}
{\list\listtemplateid1492922596{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\b\i0\strike0\outl0\shad0\embo0\impr0\caps0\v0\f0\fs24\cf0\nosupersub\striked0\fbias0 \fi-360\li360
\jclisttab\tx360 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}\b\i0\strike0\outl0\shad0\embo0\impr0\caps0\v0\f0\fs22\cf0\nosupersub\striked0\fbias0 \fi-567\li567
\jclisttab\tx567 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'06\'00.\'01.\'02.;}{\levelnumbers\'01\'03\'05;}\b\i0\strike0\outl0\shad0\embo0\impr0\caps0\v0\f0\fs20\cf0\nosupersub\striked0\fbias0 
\fi-680\li680\jclisttab\tx680 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'08\'00.\'01.\'02.\'03.;}{\levelnumbers\'01\'03\'05\'07;}\fi-648\li1728\jclisttab\tx1800 }{\listlevel\levelnfc0\leveljc0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}\fi-792\li2232\jclisttab\tx2520 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fi-936\li2736\jclisttab\tx2880 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fi-1080\li3240\jclisttab\tx3600 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fi-1224\li3744\jclisttab\tx3960 }{\listlevel\levelnfc0\leveljc0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fi-1440\li4320\jclisttab\tx4680 }{\listname ;}\listid1906063721}{\list\listtemplateid-554291698\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-340\li340\jclisttab\tx360 }{\listname ;}\listid1914923716}{\list\listtemplateid-554291698\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1
\levelspace0\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-340\li340\jclisttab\tx360 }{\listname ;}\listid2055303166}{\list\listtemplateid67698689\listsimple{\listlevel\levelnfc23\leveljc0\levelfollow0\levelstartat1\levelspace0
\levelindent0{\leveltext\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0 \fi-360\li360\jclisttab\tx360 }{\listname ;}\listid2127852066}}{\*\listoverridetable{\listoverride\listid1711806532\listoverridecount0\ls1}{\listoverride\listid796603225
\listoverridecount0\ls2}{\listoverride\listid1720594893\listoverridecount0\ls3}{\listoverride\listid1366980872\listoverridecount0\ls4}{\listoverride\listid387657069\listoverridecount0\ls5}{\listoverride\listid418990668\listoverridecount0\ls6}
{\listoverride\listid190070027\listoverridecount0\ls7}{\listoverride\listid293291867\listoverridecount0\ls8}{\listoverride\listid992030942\listoverridecount0\ls9}{\listoverride\listid2055303166\listoverridecount0\ls10}{\listoverride\listid1766030429
\listoverridecount0\ls11}{\listoverride\listid1086925978\listoverridecount0\ls12}{\listoverride\listid1204439994\listoverridecount0\ls13}{\listoverride\listid1914923716\listoverridecount0\ls14}{\listoverride\listid1225289755\listoverridecount0\ls15}
{\listoverride\listid276107537\listoverridecount0\ls16}{\listoverride\listid1885674446\listoverridecount0\ls17}{\listoverride\listid1766879857\listoverridecount0\ls18}{\listoverride\listid764230464\listoverridecount0\ls19}{\listoverride\listid923026187
\listoverridecount0\ls20}{\listoverride\listid757871045\listoverridecount0\ls21}{\listoverride\listid404231543\listoverridecount0\ls22}{\listoverride\listid186020862\listoverridecount0\ls23}{\listoverride\listid929585878\listoverridecount0\ls24}
{\listoverride\listid180320404\listoverridecount0\ls25}{\listoverride\listid166986147\listoverridecount0\ls26}{\listoverride\listid46223670\listoverridecount0\ls27}{\listoverride\listid624696602\listoverridecount0\ls28}{\listoverride\listid284580713
\listoverridecount0\ls29}{\listoverride\listid1658529913\listoverridecount0\ls30}{\listoverride\listid1906063721\listoverridecount0\ls31}{\listoverride\listid11078174\listoverridecount0\ls32}{\listoverride\listid1663847801\listoverridecount0\ls33}
{\listoverride\listid1281180326\listoverridecount0\ls34}{\listoverride\listid419528341\listoverridecount0\ls35}{\listoverride\listid2127852066\listoverridecount0\ls36}{\listoverride\listid1663847801\listoverridecount0\ls37}{\listoverride\listid1663847801
\listoverridecount0\ls38}{\listoverride\listid1394965037\listoverridecount0\ls39}}{\info{\title Evaluation of parsed corpora}{\author Diana Santos & Caroline Gasperin}{\operator Diana Santos}{\creatim\yr2002\mo4\dy4\hr11\min28}
{\revtim\yr2002\mo4\dy4\hr11\min28}{\printim\yr2002\mo3\dy22\hr11\min5}{\version2}{\edmins1}{\nofpages1}{\nofwords5584}{\nofchars31829}{\*\company SINTEF & PUCRS}{\nofcharsws39088}{\vern89}}\paperw11906\paperh16838\margl1077\margr1077\margt1701\margb1077 
\widowctrl\ftnbj\aenddoc\makebackup\formshade\viewkind4\viewscale100\pgbrdrhead\pgbrdrfoot \fet0{\*\template C:\\Program Files\\Microsoft Office\\Templates\\LREC2.dot}\sectd \linex0\headery1440\footery1440\colsx709\sectdefaultcl {\*\pnseclvl1
\pnucrm\pnstart1\pnindent720\pnhang{\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang{\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang{\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang{\pntxta )}}{\*\pnseclvl5
\pndec\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang
{\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}\pard\plain \s19\qc\sa240\nowidctlpar\outlinelevel0\adjustright \b\fs28\kerning28 {\lang2057 
Evaluation of parsed corpora: Experiments in user-transparent and user-visible evaluation
\par }\pard\plain \s15\qc\nowidctlpar\outlinelevel0\adjustright \b\kerning28 {\lang2057 Diana Santos*, Caroline Gasperin}{\f16\lang2057\super \'86}{\lang2057 
\par }\pard\plain \s16\qc\nowidctlpar\outlinelevel0\adjustright \fs20\kerning28 {\lang2057 
\par * SINTEF Tele og Data
\par Pb 124, Blindern, NO-0314 Oslo, Norway
\par Diana.Santos@sintef.no
\par }{\f16\lang2057\super \'86}{\lang2057 Faculdade de Inform\'e1tica, PPGCC, PUCRS
\par Av. Ipiranga, 6681, Pr\'e9dio 16, 90619-900 Porto Alegre, Brazil 
\par caroline@inf.pucrs.br
\par }\pard\plain \s17\qc\sb240\widctlpar\adjustright \b\fs20\cgrid {\lang2057 Abstract
\par }\pard\plain \s18\qj\sl-200\slmult0\widctlpar\adjustright \fs18\cgrid {\lang2057 In the present paper, we describe and discuss the evaluation of parsed corpora, namely the ones that are available on the Web for querying in the AC/DC project. The pape
r has two parts: the first one suggests a set of different evaluation parameters and measures that are much more illuminating than commonly used simple precision measures, while the second evaluates the parsed corpus for a particular task -- that of autom
atic thesaurus building. The two evaluations are thus complementary, in that, in Gaizauskas (1998) terminology, the first is a typical user-transparent evaluation, while the second is user-visible.
\par 
\par \sect }\sectd \margtsxn1418\sbknone\linex0\headery1440\footery1440\cols2\sectdefaultcl {\listtext\pard\plain\s20 \b\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 1.\tab}\pard\plain \s20\qc\fi-360\li360\sb240\sa60\sl-220\slmult0\widctlpar
\jclisttab\tx360\ls38\adjustright \b\cgrid {\lang2057 Introduction 
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 There is at present a large activity as far
 as parser evaluation is concerned, witnessed among other things by the workshop "Towards improved evaluation measures for parsing systems" at the present conference.
\par We are concerned here with the closely related subject of }{\i\lang2057 parsed corpora evaluation}{\lang2057 , whic
h brings, however, a different perspective into the picture. In fact, although a parsed corpus can be seen as a frozen picture of a parsing system, it has usually a life of its own, and a set of users, and uses, which are different from those of the parse
r itself. In addition, many of the parsed corpora presented as such, or as treebanks, include human revision and therefore problems and capabilities beyond those provided by a parser itself.
\par Santos and Bick (2000) presented the AC/DC project}{\cs31\lang2057\super \chftn {\footnote \pard\plain \s30\widctlpar\adjustright \fs18\lang2057\cgrid {\cs31\super \chftn }{ See }{\field\flddirty{\*\fldinst { HYPERLINK http://cgi.portugues.mct.pt/acesso/ 
}{{\*\datafield 
00d0c9ea79f9bace118c8200aa004ba90b02000000170000002400000068007400740070003a002f002f006300670069002e0070006f0072007400750067007500650073002e006d00630074002e00700074002f00610063006500730073006f002f000000e0c9ea79f9bace118c8200aa004ba90b48000000680074007400
70003a002f002f006300670069002e0070006f0072007400750067007500650073002e006d00630074002e00700074002f00610063006500730073006f002f000000}}}{\fldrslt {\cs32\ul\cf2 http://cgi.portugues.mct.pt/acesso/}}}{.}}}{\lang2057 , a Web service
 giving access to Portuguese parsed corpora using the PALAVRAS parser }{\lang2057 (Bick, 2000),}{\lang2057  and mentioned the need to make user studies to evaluate its usefulness and the quality of the underlying annotation.
\par We believe there is too little work on the evaluation o
f language resources in themselves (as compared to programs, systems or tools), although it might be argued that the first kind should be easier to evaluate than the second. Santos and Rocha (2001) attempted to evaluate a large corpus as far as structure 
and tokenization was concerned. Here, we try to go a step further and look at syntactically annotated corpora.
\par {\listtext\pard\plain\s20 \b\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 2.\tab}}\pard\plain \s20\qc\fi-360\li360\sb240\sa60\sl-220\slmult0\widctlpar\jclisttab\tx360\ls38\adjustright \b\cgrid {\lang2057 Goal and outline of the paper
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 The primary motivation for the paper is the need to provide users of the AC/DC service with rigorous information of 
what is being supplied, and what the shortcomings are that are (vaguely) known to exist in the material.
\par }{\lang2057 Gaizauskas (1998) has suggested to bring the user into the evaluation of NLP applications. He distinguishes between }{\i\lang2057 user-transparent}{\lang2057  evaluations, that 
look in terms of input and output of a particular computational-linguistic task, which may not make sense for a external user, and }{\i\lang2057 user-visible}{\lang2057 
 evaluations where one is measuring success relative to a particular task a user understands and is involved with.
\par In the context of corpora resources, the typical user-transparent question concerns the quality of the actual tagging and parsing, while user-visible evaluation depends mainly on what a user is supposed to do with the parsed corpus, and how directly its 
quality matters for that task.
\par In this paper, we suggest a series of criteria for the first kind of evaluation and measure some of them; for the second kind, we investigate the task of automated thesaurus building (Grefenstette, 1994) following Gasperin's (2001) work for Portuguese.

\par }{\lang2057 We are most grateful to Eckhard Bick to have supplied his PALAVRAS, and would like to emphazise early on that what we are presenting here is }{\i\lang2057 not}{\lang2057 
 a parser evaluation. In fact, our parsed corpora have been created using several different versions of the parser (none reflecting its today\rquote 
s performance) and, besides, the final rendering of the parsed corpora amounted to differences in around 20% of the tokens, as detailedly explained in Santos and Bick (2000), which means that the AC
/DC project in itself "added" many parsing options, and possibly many mistakes as well.
\par Still, the parsed corpora exist and are being actively used by an increasing user community. Therefore, they deserve to be evaluated in their own right and qualified so
 that they can be improved and its improvement measured, something which so far has not been possible to do in a systematic way.
\par We provide here a short description of the corpora used for the present paper:
\par ENPCANOT (v.2.3) is }{\lang2057 a corpus of translations of English fiction texts into Portuguese, a subset of the Portuguese part of the ENPC corpus (Johansson et al., 1999; Santos and Oksefjell, 1999) containing around 70,000 words.}{\lang2057 
 It was manually revised by the ENPC team and contains texts in the European and Brazilian variants of Portuguese.
\par EBRANOT (v.3.4) is a part of the Borba-Ramsey corpus, distributed by the ECI/MCI iniciative, and contains exclusively Brazilian text in several genres: literary, newspaper, scientific articles and law, amounting to 700,000 words.
\par NATPANOT (v.2.6) is a corpus of 8 million words of newspaper text (1991-1994) in European Portuguese.
\par FOLHANOT is the first million of a newspaper text corpus in Brazilian Portuguese, currently in development by the AC/DC project. It is a proper subset of the SCANOT corpus, compiled by NILC.
\par {\listtext\pard\plain\s20 \b\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 3.\tab}}\pard\plain \s20\qc\fi-360\li360\sb240\sa60\sl-220\slmult0\widctlpar\jclisttab\tx360\ls38\adjustright \b\cgrid {\lang2057 Annotation quality
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 
Although one could in principle be interested in all aspects having to do with an annotated corpus, such as: Is there sufficiently encompassing documentation? Is there a formal definition, in t
he form of e.g. a DTD? Does the corpus conform to it? Has the corpus been validated by a third party? Has it been evaluated? etc. etc., we will be here solely concerned with what is central to the parsing issue.
\par {\listtext\pard\plain\s22 \b\fs22\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 3.1.\tab}}\pard\plain \s22\qj\fi-567\li567\sb240\sa60\sl-220\slmult0\widctlpar\jclisttab\tx567\ls38\ilvl1\adjustright \b\fs22\cgrid {\lang2057 What should a parser do? 
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 By informing other
s that a corpus is parsed, we implicitly state at least the four tenets: 1) The text units (tokens) have been recognized and assigned to their right category (lemmatization and PoS tagging); 2) MWE have been identified (tokenization); 3) Morphological inf
o
rmation has been made explicit (morphological analysis); and 4) Syntactic constituents and relations have been identified (couched, depending on the theoretical inclinations, as constituency, functional and/or dependency structure). Additionally, other ki
nds of information can also be present in parsed corpora, such as named entity classification, anaphoric dependencies or rethorical structure, which we will disregard here since they are absent from the AC/DC corpora.
\par Not all these tasks are equally relevan
t or well defined, and not all the problems that are to be solved equally frequent. In addition, there are strong dependencies between these tasks, with the vexing property that each requires a different unit of measure. We will try to describe these prob
lems in detail with the help of the AC/DC corpora. But first we turn to the problem of assessing separately the different kinds of information.
\par {\listtext\pard\plain\s23 \b\fs20\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 3.1.1.\tab}}\pard\plain \s23\qj\fi-680\li680\sb240\sl-220\slmult0\widctlpar\jclisttab\tx680\ls38\ilvl2\adjustright \b\fs20\cgrid {\lang2057 
In which level is one particular phenomenon handled?
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 In many cases it is up to the parser developer in which linguistic level \endash  better, in which way \endash 
 a particular distinction made in language should be encoded in the output of the parser. This should refrain one to evaluate levels independently, especially when comparing different parsing approaches. 
\par One examples is the choice between encoding a particular syntactic difference as PoS or as constituent function. In }{\i\lang2057 Tr\'eas quartos do hotel foram ocupados pela pol\'edcia}{\lang2057 
 (three quarters/rooms of the hotel were taken by the police) one can represent the difference by assigning the PoS noun to }{\i\lang2057 quartos}{\lang2057 
 in one interpretation and the PoS numeral in the other. Alternatively, one can have both parses tagging quartos as noun at the PoS level, but individuated by their function inside the NP }{\i\lang2057 Tr\'eas quartos do hotel}{\lang2057  \endash 
 having either }{\i\lang2057 quartos}{\lang2057  (rooms) or }{\i\lang2057 hotel}{\lang2057  as NP head.
\par Another encoding alternative is between PoS or constituent type: In }{\i\lang2057 Os pobres sa\'edram }{\lang2057 (the poor left), }{\i\lang2057 pobres}{\lang2057 
 may be assigned the PoS noun and assigned head of the NP, or the PoS adjective and still head of the NP, both conveying the same thing (though with different underlying theories).
\par The same liberty at making distinctions can be seen in the three sentences }{\i\lang2057 Ele est\'e1 de volta}{\lang2057  (He is }{\ul\lang2057 back}{\lang2057 ), }{\i\lang2057 De volta da m\'e3e, ele apressava-se}{\lang2057  (}{\ul\lang2057 Around}{
\lang2057  mother, he hurried) or }{\i\lang2057 Comprou o bilhete de volta}{\lang2057  (He bought the }{\ul\lang2057 return}{\lang2057  ticket), where a parser can give the same PoS, viz. preposition noun, to the three instances of }{\i\lang2057 de volta}
{\lang2057 , but separate them by function (e.g. by AJP, AVP and  PP), or actually perform three different tokenizations as well: \ldblquote de volta\rdblquote , \ldblquote de volta de\rdblquote , and \ldblquote de\rdblquote  \ldblquote volta\rdblquote . 

\par Examples could be multiplied at will \endash  what is relevant is the need to understand the parsing scheme in order to distinguish wrong parses from systematic ways of dealing with a particular phenomenon. 
\par {\listtext\pard\plain\s23 \b\fs20\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 3.1.2.\tab}}\pard\plain \s23\qj\fi-680\li680\sb240\sl-220\slmult0\widctlpar\jclisttab\tx680\ls38\ilvl2\adjustright \b\fs20\cgrid {\lang2057 Categorial ambiguity 
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 
The first requirement or expectation when facing a parsed corpus is that words that are categorially ambiguous out of context are assigned their right part of speech. But measures such as percentage of right PoS assignment have long been shown t
o be inappropriate (Santos, 1999), because they do not take into account the difficulty of the problem, both from a macro and from a microperspective:
\par In fact, ca. 90% of the words in a text (66% of the types) are unambiguous (for example, most of those that belong to a closed set such as prepositions, conjunctions, personal pronouns, negative adverbs, etc.)}{\cs31\lang2057\super \chftn {\footnote 
\pard\plain \s30\widctlpar\adjustright \fs18\lang2057\cgrid {\cs31\super \chftn }{ These numbers are based on old studies regarding }{\i major}{
 PoS, done for Portuguese (Medeiros et al., 1993; Santos, 1996). In Table 5 ahead, concerning a hundred PoS distinctions, the number of unambiguous forms is only slightly above 50%.}}}{\lang2057 
. In addition, if for all wordforms that belong much more frequently to one PoS than the other the more frequent label is assigned, overall one gets mo
re than 95% of PoS labels right. However, this is no measure of the quality of PoS tagging, giving that, if such a procedure were followed, gross syntactic incorrections might occur, such as the sequence of two syntactically incompatible tags...
\par One should compute, for each potentially ambiguous form present in the corpus, what the difficulty and the information-theoretic gain is of deciding what is their PoS, in order to be able to measure the job done by the parsing procedure.}{
\cs31\lang2057\super \chftn {\footnote \pard\plain \s30\widctlpar\adjustright \fs18\lang2057\cgrid {\cs31\super \chftn }{
 It is true that one should also consider the (few) cases where the only PoS assigned is wrong (and which may come from guessing about unknown words, or even from wrong dictionary entries). However, this should not, in our view, be b
rought to the same count as all the unambiguous words whose PoS was right by simple dictionary lookup.}}}{\lang2057  For each pair of <wordform,
 PoS> could then precision and recall be measured (see Hindle and Rooth (1993) for the need to have different PR-measures for each choice).
\par Table 1 presents some of these figures in the small ENPCANOT corpus, for wordforms ambiguous between verb and noun readings. The data column presents correct noun readings \endash  wrong noun readings \endash  wrong verb readings \endash 
 correct verb readings. The PR column presents noun precision, noun recall, verb precision and verb recall.
\par 
\par }\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr
\brdrs\brdrw10 \cltxlrtb \cellx-887\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx235\clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr
\brdrdb\brdrw10 \cltxlrtb \cellx2068\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {wordform \cell Data \cell PR \cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb
\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-887\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl
\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx235\clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx2068\pard \widctlpar\intbl\adjustright {espera \cell 
10 0 0 2\cell 1.0 1.0 1.0 1.0\cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc
\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-887\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx235\clvertalc\clbrdrt
\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx2068\pard \widctlpar\intbl\adjustright {casa \cell 95 0 1 1 \cell 1.0 .989 1.0 0.5\cell }\pard \widctlpar\intbl\adjustright {\row }\pard 
\widctlpar\intbl\adjustright {ser\cell 7 2 0 147\cell .77 1.0 1.0 .986\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {volta\cell 36 0 0 1\cell 1.0 1.0 1.0 1.0\cell }\pard \widctlpar\intbl\adjustright {\row }\pard 
\widctlpar\intbl\adjustright {sentido\cell 7 0 4 4\cell 1.0 .636 .5 1.0\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {ouvido\cell 1 0 0 6\cell 1.0 1.0 1.0 1.0\cell }\pard \widctlpar\intbl\adjustright {\row }\pard 
\widctlpar\intbl\adjustright {jantar \cell 10 1 2 5\cell .909 .833 .714 .833\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {comida\cell 10 0 1 1\cell 1.0 1.0 .5 .5\cell }\pard \widctlpar\intbl\adjustright {\row }\pard 
\widctlpar\intbl\adjustright {gosto\cell 7 0 0 6\cell 1.0 1.0 1.0 1.0\cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh
\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-887\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr
\brdrs\brdrw10 \cltxlrtb \cellx235\clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx2068\pard \widctlpar\intbl\adjustright {vinda \cell 1 1 0 2 \cell .5 .5 1.0 1.0\cell }\pard 
\widctlpar\intbl\adjustright {\row }\pard\plain \s24\qc\sb240\widctlpar\adjustright \fs20\lang2057\cgrid {Table 1: Evaluating noun/verb disambiguation
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 
\par It is at once obvious that no averaging of these numbers will do, since for different forms (or contexts) the parser will do better for \ldblquote nounness\rdblquote  or \ldblquote verbness\rdblquote . T
he table above just shows that we have inspected 194 rightly analysed nouns, four verbs incorrected labelled as nouns, eight nouns incorrectly classified as verbs, and 175 correctly identified verbs. Noun precision (.979=194/198) will not be a function of
 the individual noun precisions, nor the other values will: verb precision .956, noun recall .960, verb recall .977.
\par Things get even more difficult when realizing that there are more complex disambiguating tasks also measurable in noun or verb precision, na
mely ambiguity with other parts of speech. Table 2 illustrates similar calculations for other PoS pairs or trios (only precision/recall regarding the two first PoS is presented, though taking into consideration all analyses).
\par  
\par }\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr
\brdrs\brdrw10 \cltxlrtb \cellx-279\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx713\clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr
\brdrdb\brdrw10 \cltxlrtb \cellx2204\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {wordform \cell Data \cell PR \cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb
\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-279\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl
\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx713\clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx2204\pard \widctlpar\intbl\adjustright {desses*
(gram/V) \cell 8 2 0 2\cell .8 1.0 1.0 .5\cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 
\clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-279\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx713\clvertalc
\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx2204\pard \widctlpar\intbl\adjustright {sobre(gram/V/N) \cell 105 0 3 3\cell 1.0 .97 .5 1.0 \cell }\pard \widctlpar\intbl\adjustright {\row 
}\pard \widctlpar\intbl\adjustright {suas(gram/V)\cell 84 0 2 2\cell 1.0 .98 .5 1.0\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {alto(ADJ/ADV/N)\cell 13 1 3 6\cell .93 .93 .67 .86\cell }\pard 
\widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {claro(ADJ/ADV)\cell 24 21 1 4\cell .53 .96 .8 .16\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {quartos(N/ADJ)\cell 4 0 0 1\cell 1.0 0.0 0.0 1.0
\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {creme(N/ADJ) \cell 3 0 0 2\cell 1.0 0.0 0.0 1.0\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {presentes*(N/ADJ)\cell 3 2 0 9\cell 
0.6 1.0 1.0 .82\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {tarde(V/ADV/N)\cell 1 1 1 41\cell 0.5 0.5 .98 .98\cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl
\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-279\clvertalt\clbrdrt
\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx713\clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx2204\pard 
\widctlpar\intbl\adjustright {fora(V/ADV) \cell 54 3 7 36\cell .95 .89 .84 .92 \cell }\pard \widctlpar\intbl\adjustright {\row }\pard\plain \s24\qc\sb240\widctlpar\adjustright \fs20\lang2057\cgrid {Table 2: Evaluating other PoS disambiguation tasks
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 
\par It should in any case be noted that even a seemingly simple task as deciding for PoS is marred by the difficulty, alluded before, of identifying the correct level where information is encoded \endash 
 and the converse, which level to assign an error if it is }{\i\lang2057 not}{\lang2057  conveyed. For example, consider the following phrases:
\par {\pntext\pard\plain\s21 \f3\fs20\lang2057\cgrid \loch\af3\dbch\af0\hich\f3 \'b7\tab}}\pard \s21\qj\fi-360\li360\sl-220\slmult0\widctlpar\jclisttab\tx360{\*\pn \pnlvlblt\ilvl0\ls34\pnrnot0\pnf3\pnstart1\pnindent360\pnhang{\pntxtb \'b7}}\ls34\adjustright {
\i\lang2057 \'e0 espera}{\lang2057  (waiting) constitutes an adverbial phrase (though the word }{\i\lang2057 espera}{\lang2057  is related to the noun }{\i\lang2057 espera}{\lang2057 , waiting)
\par {\pntext\pard\plain\s21 \f3\fs20\lang2057\cgrid \loch\af3\dbch\af0\hich\f3 \'b7\tab}}\pard \s21\qj\fi-360\li360\sl-220\slmult0\widctlpar\jclisttab\tx360{\*\pn \pnlvlblt\ilvl0\ls34\pnrnot0\pnf3\pnstart1\pnindent360\pnhang{\pntxtb \'b7}}\ls34\adjustright {
\i\lang2057 ao largo}{\lang2057  (at a distance) also works as an adverb (the word }{\i\lang2057 largo}{\lang2057  is not related to the noun }{\i\lang2057 largo}{\lang2057 , square), and 
\par {\pntext\pard\plain\s21 \f3\fs20\lang2057\cgrid \loch\af3\dbch\af0\hich\f3 \'b7\tab}}\pard \s21\qj\fi-360\li360\sl-220\slmult0\widctlpar\jclisttab\tx360{\*\pn \pnlvlblt\ilvl0\ls34\pnrnot0\pnf3\pnstart1\pnindent360\pnhang{\pntxtb \'b7}}\ls34\adjustright {
\i\lang2057 a seguir}{\lang2057  (next) is only metaphorically related to the verb }{\i\lang2057 seguir}{\lang2057  (follow), being used in much wider contexts than an infinitive phrase, namely as a complex adjective or adverb
\par }\pard \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright {\lang2057 So, should one consider the simple assignment of respectively noun, noun and verb to }{\i\lang2057 espera}{\lang2057 , }{\i\lang2057 largo}{\lang2057  and }{\i\lang2057 seguir}{
\lang2057  a right PoS assignment? If the distinction were encoded in other parts of the analysis, maybe yes. If not \endash 
 where to measure it? The easiest way would be to remove these cases simply from PoS accounting, and expect them to be rewarded (or punished) at the 
right level. But let us note the lack of a golden rule for measuring and encoding these matters: there is no universal or near universal consensus on what the text units should be (words or multiword expressions). So, if one does not want to incur in a ju
d
gement of the underlying grammatical theory -- then we would be actually comparing two different parsing approaches and not a parsed corpus in itself -- there are only two ways left. The first is using the extensional limits provided by the parser and con
s
ider the output right when no other alternative is possible. The second is to use, besides right and wrong, a third category in our precision and recall computations, to mean that the relevant distinction is or should be encoded at a different level (and 
then reward it at that level).
\par One of the places where this is more obviously reflected is in tokenization. See Santos and Bick (2000) for an illustration of the amount and kind of differences.
\par {\listtext\pard\plain\s23 \b\fs20\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 3.1.3.\tab}}\pard\plain \s23\qj\fi-680\li680\sb240\sl-220\slmult0\widctlpar\jclisttab\tx680\ls38\ilvl2\adjustright \b\fs20\cgrid {\lang2057 
Lemmatization and morphological analysis
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 In many cases, lemmatization is trivial after PoS assignment,}{\cs31\lang2057\super \chftn {\footnote \pard\plain \s30\widctlpar\adjustright \fs18\lang2057\cgrid {
\cs31\super \chftn }{ Assuming that the underlying morphological analyser is reliable, which is obviously a simplification, especially in the case of unknown words. For a pr
oportion of these in Portuguese text see Reis (1993); for a study of the performance of PALAVRAS in this respect see Bick (1998).}}}{\lang2057 
 and therefore should not get more credit for the parser, but not always, because of intracategorial ambiguity. This is especially common for verb forms in Portuguese, but also possible in nouns, as illustrated in Table 3: 
\par 
\par }\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr
\brdrs\brdrw10 \cltxlrtb \cellx-279\clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx1493\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {wordform \cell possible lemmas
\cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl
\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-279\clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx1493\pard \widctlpar\intbl\adjustright {fora(V) 
\cell ser ir\cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt
\brdrs\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-279\clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx1493\pard 
\widctlpar\intbl\adjustright {vendo(V)\cell vender ver vendar\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {vira(V)\cell ver virar\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {
revista(V)\cell rever revistar\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {costas(N)\cell costa costas\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {gra\'e7as(N)\cell gra\'e7
a gra\'e7as\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {vimos(V)\cell ver vir\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {amara(V)\cell amar amarar\cell }\pard 
\widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {assente(V)\cell assentir assentar\cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr
\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-279\clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb
\brdrdb\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx1493\pard \widctlpar\intbl\adjustright {lido(V)\cell ler lidar\cell }\pard \widctlpar\intbl\adjustright {\row }\pard\plain \s24\qc\sb240\widctlpar\adjustright \fs20\lang2057\cgrid {
Table 3: Lemma ambiguity
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 
\par The disambiguation of morphological features, when they are not defined by lemma and PoS, is yet another task on which to measure the performance of a parser (and/or the quality of annotation of a parsed corpus). Clear exa
mples are the pervasive ambiguity (for all but the most irregular verbs) between 
\par {\pntext\pard\plain\s21 \f3\fs20\lang2057\cgrid \loch\af3\dbch\af0\hich\f3 \'b7\tab}}\pard \s21\qj\fi-360\li644\sl-220\slmult0\widctlpar\jclisttab\tx644{\*\pn \pnlvlblt\ilvl0\ls35\pnrnot0\pnf3\pnstart1\pnindent360\pnhang{\pntxtb \'b7}}\ls35\adjustright {
\lang2057 future of subjunctive and infinitive forms; 
\par {\pntext\pard\plain\s21 \f3\fs20\lang2057\cgrid \loch\af3\dbch\af0\hich\f3 \'b7\tab}}\pard \s21\qj\fi-360\li644\sl-220\slmult0\widctlpar\jclisttab\tx644{\*\pn \pnlvlblt\ilvl0\ls35\pnrnot0\pnf3\pnstart1\pnindent360\pnhang{\pntxtb \'b7}}\ls35\adjustright {
\lang2057 first and third person of imperfeito (in both indicative and subjunctive moods);
\par {\pntext\pard\plain\s21 \f3\fs20\lang2057\cgrid \loch\af3\dbch\af0\hich\f3 \'b7\tab}}\pard \s21\qj\fi-360\li644\sl-220\slmult0\widctlpar\jclisttab\tx644{\*\pn \pnlvlblt\ilvl0\ls35\pnrnot0\pnf3\pnstart1\pnindent360\pnhang{\pntxtb \'b7}}\ls35\adjustright {
\lang2057 perfeito and pluperfect tense in the third person plural;
\par {\pntext\pard\plain\s21 \f3\fs20\lang2057\cgrid \loch\af3\dbch\af0\hich\f3 \'b7\tab}}\pard \s21\qj\fi-360\li644\sl-220\slmult0\widctlpar\jclisttab\tx644{\*\pn \pnlvlblt\ilvl0\ls35\pnrnot0\pnf3\pnstart1\pnindent360\pnhang{\pntxtb \'b7}}\ls35\adjustright {
\lang2057 perfeito and present in the first person plural}{\cs31\lang2057\super \chftn {\footnote \pard\plain \s30\widctlpar\adjustright \fs18\lang2057\cgrid {\cs31\super \chftn }{
 Due to different spelling conventions, this applies only for verbs ending in }{\i er}{ or }{\i ir}{ for European Portuguese, but for almos
t all verbs in Brazilian Portuguese. As far as we know, this was not taken in consideration in the automatic analysis, resulting in a much larger number of initially ambiguous forms -- and actually incorrect portmanteau tags -- in the European Portuguese 
texts.}}}{\lang2057 
\par }\pard \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright {\lang2057 This also holds for gender of nouns such as }{\i\lang2057 capital}{\lang2057 , }{\i\lang2057 moral}{\lang2057 , }{\i\lang2057 presidente}{\lang2057  and those ending in }{\i\lang2057 
ista}{\lang2057 , as well as for gender of a large class of invariant adjectives. A less considerable task is number d
isambiguation for a few nouns and adjectives. Finally, one further non trivial task of a parser is to assign gender (and number) to a proper noun (something not necessarily obvious even for a human being, see Afonso et al. (2002a) for discussion).
\par It is ar
guable whether gender and number of (non-lexically determined) pronouns should be considered as a morphological disambiguation task. We will not consider it here, although all pronoun instances are marked in the corpora as M/F (both genders possible).

\par In f
act, one important fact regarding morphological ambiguity in the present parsed corpora is that most of it is simply not resolved, which means that a large number of wordforms still carry portmanteau labels (15%, 11% or 10% of all the forms not classified
 
as invariant). Just to give a more precise idea of what this means in practice, let us look at the disambiguation of presente and perfeito in the first person plural in the EBRANOT corpus. In the 1745 cases marked present and/or perfeito in that person, t
he distribution is as displayed in Table 4.
\par 
\par }\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr
\brdrs\brdrw10 \cltxlrtb \cellx-279\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx1013\clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr
\brdrdb\brdrw10 \cltxlrtb \cellx2520\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {Tense\cell }\pard \qc\widctlpar\intbl\adjustright {Total\cell Ambiguous\cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trleft-2026\trbrdrt
\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-279
\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx1013\clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx2520\pard 
\widctlpar\intbl\adjustright {presente \cell }\pard \qc\widctlpar\intbl\adjustright {806\cell 52\cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr
\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-279\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb
\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx1013\clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx2520\pard \widctlpar\intbl\adjustright {perfeito\cell }\pard 
\qc\widctlpar\intbl\adjustright {168\cell 44\cell }\pard \widctlpar\intbl\adjustright {\row }\pard \widctlpar\intbl\adjustright {presente/perfeito\cell }\pard \qc\widctlpar\intbl\adjustright {771\cell 771\cell }\pard \widctlpar\intbl\adjustright {\row 
}\trowd \trqc\trleft-2026\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr
\brdrs\brdrw10 \cltxlrtb \cellx-279\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx1013\clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr
\brdrdb\brdrw10 \cltxlrtb \cellx2520\pard \widctlpar\intbl\adjustright {Total\cell }\pard \qc\widctlpar\intbl\adjustright {1,745\cell 867\cell }\pard \widctlpar\intbl\adjustright {\row }\pard\plain \s24\qc\sb240\widctlpar\adjustright \fs20\lang2057\cgrid 
{Table 4: Disambiguation of tense form
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 
\par While almost 60% of the forms have only one label assigned, a quick ins
pection of both presente and perfeito cases shows that the vast majority of them was already unambiguous from the start (belonging to those verbs having distinct forms). So, in practice, the disambiguation task was only done in 96 forms out of 867 (11% of
 the cases). 
\par Following the same procedure to analyse this kind of disambiguation task as used for PoS: Of 44 forms analysed as perfeitos, 4 are wrong (should be presente) and 40 right, thus yielding a precision of perfeito recognition of .91. As for the 54
 forms classified as presente, 13 are wrong (3 of them featuring as well a wrong lemma, one of which due to a spelling error, so only 10 are actually perfeitos), 35 are right, and 6 are possible in the two interpretations (even consulting the largest poss
i
ble context). In order to simplify the present computations we stipulate, in this case, three wrong and three right (three perfeitos and three presentes). Thus we get .70 precision in identifying presente, and .90 coverage, while we have .75 coverage for 
perfeito identification.
\par {\listtext\pard\plain\s23 \b\fs20\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 3.1.4.\tab}}\pard\plain \s23\qj\fi-680\li680\sb240\sl-220\slmult0\widctlpar\jclisttab\tx680\ls38\ilvl2\adjustright \b\fs20\cgrid {\lang2057 Syntax proper
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 
To talk of a parsed corpus instead of a tagged one, larger elements than words (or basic units) have to be identified, and (some of) their functions have to be revealed. This is the more complex part of the parsing wor
k, and it is also the one which requires more complicated assessment procedures, even if one is simply evaluating }{\i\lang2057 one}{\lang2057 
 parsed corpus and not competing schemes of annotation (as concerns Black et al. (1991), Lin (1995) or Carroll et al. (1998)).
\par As far as we
 know, there is no fixed number of syntactic distinctions that one can use as a measure, and syntax, as opposed to morphology, is still a sparsely exploited area. There is no other way, it seems, at least for the time being, than to conform to the theory 
o
beyed by the parser and, inside its limits, test what is right and wrong. One has to list the possible analyses contemplated (forget those that were not) and, in light of the alternatives, decide whether the result is the best possible. For the AC/DC corp
o
ra, the underlying theory is dependency based, so there is no direct way to define constituents, and there is quite a large number of cases where attachment is left unspecified. Besides, and as was the case for morphological information, there is a consid
erable number of alternative function tags that have not been disambiguated.
\par But still, for each verb which admitted of an object one could compute precision and recall of object detection; for each verb which admitted an object, one could compute the PR fi
gures; for each ditransitive verb of the form NP PP one could check them, and so on. Conversely, for all sentences one could check appropriate main verb detection, as well as (apparent) right argument structure.
\par Again, one has to be careful about what is the domain of possible/wrong categories, even when function labels are assigned to every word. (All syntactic information available in the AC/DC parsed corpora is through function labels.) 
\par \sect }\sectd \margtsxn1418\sbknone\linex0\headery1440\footery1440\colsx340\sectdefaultcl \pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {
\par 
\par }\trowd \trqc\trleft-5102\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr
\brdrs\brdrw10 \cltxlrtb \cellx-3297\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-1866\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr
\brdrs\brdrw10 \cltxlrtb \cellx-165\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx1819\clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr
\brdrdb\brdrw10 \cltxlrtb \cellx4371\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {Corpus\cell }\pard \qc\widctlpar\intbl\adjustright {Size\cell PoS ambiguity\cell Lemma ambiguity\cell Morphological ambiguity\cell }\pard 
\widctlpar\intbl\adjustright {\row }\trowd \trqc\trleft-5102\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl
\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-3297\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-1866\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl
\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-165\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx1819\clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl
\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx4371\pard \widctlpar\intbl\adjustright {ENPCANOT\cell }\pard \qc\widctlpar\intbl\adjustright {72,431
\par 12,886\cell 29,531
\par 730\cell 3,264
\par 41\cell 13,063
\par 905\cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trleft-5102\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl
\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-3297\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-1866\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl
\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-165\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx1819\clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl
\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx4371\pard \widctlpar\intbl\adjustright {EBRANOT \cell }\pard \qc\widctlpar\intbl\adjustright {722,715
\par 60,118\cell 348,576
\par 4,419\cell 39,974
\par 123\cell 164,760
\par 6,654\cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trleft-5102\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \clvertalc\clbrdrt\brdrs\brdrw10 
\clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-3297\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-1866\clvertalt\clbrdrt\brdrs\brdrw10 
\clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx-165\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb \cellx1819\clvertalc\clbrdrt\brdrs\brdrw10 \clbrdrl
\brdrs\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx4371\pard \widctlpar\intbl\adjustright {NATPANOT \cell }\pard \qc\widctlpar\intbl\adjustright {6,295,653
\par 167,206\cell 3,223,063
\par 11,534\cell 448,916
\par 420\cell 1,570, 102
\par 18,395\cell }\pard \widctlpar\intbl\adjustright {\row }\pard\plain \s24\qc\sb240\widctlpar\adjustright \fs20\lang2057\cgrid {Table 5: Some extensional measures of disambiguation need for three different corpora
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {
\par \sect }\sectd \margtsxn1418\sbknone\linex0\headery1440\footery1440\cols2\colsx340\sectdefaultcl \pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 
In fact, PoS may uniquely determine function, as is the case with articles, always assigned the function label N>. Also simple PoS sequences such as preposition (article adjective*) noun, result in the noun necessarily getting
 the label P< and all intervening articles and adjectives the label >N.}{\cs31\lang2057\super \chftn {\footnote \pard\plain \s30\widctlpar\adjustright \fs18\lang2057\cgrid {\cs31\super \chftn }{
 Except when followed by a non-finite verb, where the noun is parsed as subject of the following infinitive or gerundive clause.}}}{\lang2057 
 This, incidentally, constitutes respectively 49%, 48% and 52% of all words classified as nouns in the corpora we are dealing with.
\par {\listtext\pard\plain\s22 \b\fs22\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 3.2.\tab}}\pard\plain \s22\qj\fi-567\li567\sb240\sa60\sl-220\slmult0\widctlpar\jclisttab\tx567\ls38\ilvl1\adjustright \b\fs22\cgrid {\lang2057 
Relevant characteristics of a parsed corpus
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 After de
tailing the problems and before suggesting measures, we would like to note that a corpus, no matter how large, has a fixed vocabulary, so that quality features for each word can be exhaustively computed, as well as the difficulty involved in parsing it (p
rior to parsing).
\par So, for each wordform occurring in the corpus one can know its span (the set of different possible analysis).}{\cs31\lang2057\super \chftn {\footnote \pard\plain \s30\widctlpar\adjustright \fs18\lang2057\cgrid {\cs31\super \chftn }{
 One could as well have a frequency estimate of the relative probability of each PoS, by itself or as an n-gram, etc.}}}{\lang2057 
 It is therefore possible to give a first measure of the parsing difficulty of a corpus by presenting statistics like the percentage of ambiguous wordforms. It is important to stress, if one is }{\i\lang2057 comparing}{\lang2057 
 (and not only evaluating) corpora, to realize that different corpora may offer different challenges to syntactic analysis.
\par We can use, for this estimation, both internal and externa
l criteria. Internal criteria are what the corpus in itself reveals, having the number of forms assigned different analyses as one measure of the disambiguation difficulty present in the corpus. This is, obviously, a measure by default: All ambiguous form
s that have been disambiguated and have been found to occur in only one way are counted as unambiguous... but note that possible error is neither computed as well (forms with one analysis in the corpus pair with unambiguous forms).
\par External criteria would use other sources of probing, like morphological analysers and lemmatizers. Ideally, the ones used by the parser itself. 
\par Table 5 gives, for three different corpora, the following figures, obtained by internal criteria: sheer size in words, number of catego
rially ambiguous word forms, number of intracategorially ambiguous word forms as far as different lemmas are concerned, and number of intracategorially ambiguous word forms as far as morphology is concerned.}{\cs31\lang2057\super \chftn {\footnote 
\pard\plain \s30\widctlpar\adjustright \fs18\lang2057\cgrid {\cs31\super \chftn }{ This means that they were assigned the same PoS. Lemma atribution and morphological marking were assessed independently.}}}{\lang2057 
 In all cases we present the number of tokens an
d types. It should be emphasized that these numbers have to be read relative to the number of possible distinctions present in the parsed corpus, and are not meaningful as absolute measures. For example, many of the PoS differences refer to subcategorizat
ion, and many people would argue against calling them PoS ambiguity. Still, this is the way the corpora were encoded, so it is at least one possible way to look at the matter.
\par Note that, if one knew that all corpora had been parsed by the same (version of t
he) parser, one could increase the number of ambiguous forms by adding up all possible analyses across corpora. That is, unique occurrences in one corpus could be identified as ambiguous with the help of occurrences in other corpora. We have not done this
 here, though.
\par Also, note that we have not taken into consideration the word forms analysed as belonging to a proper noun (named entity), each of which individually carries a PROP tag. So, we deleted them prior to inspecting potential ambiguity, as well as 
merged capitalized and non-capitalized forms in the computations above.
\par {\listtext\pard\plain\s22 \b\fs22\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 3.3.\tab}}\pard\plain \s22\qj\fi-567\li567\sb240\sa60\sl-220\slmult0\widctlpar\jclisttab\tx567\ls38\ilvl1\adjustright \b\fs22\cgrid {\lang2057 How to measure quality?
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 Ideally, one would pick all ambiguous forms and check them, in the way illustrated above \endash  but this procedure would be as costly as to parse the whole corpus ma
nually once again. So, the most obvious solution is to randomly select a subset of the ambiguous forms, and measure them, extrapolating as far as quality in the whole corpus is concerned.
\par We have thus randomly selected 100 cases (distinct types) of each kind of disambiguation, and analysed them. We have only taken into account non-capitalized words, in order not to add the additional question of recognizing proper names (named entities). 

\par }{
\par }\trowd \trqc\trgaph70\trleft265\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 \clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw15 
\clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx1618\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx2327\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr
\brdrs\brdrw15 \cltxlrtb \cellx3152\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx4841\pard \s21\qc\sl-220\slmult0\widctlpar\intbl\adjustright {Corpus\cell PoS\cell Lemma \cell 
Morphology\cell }\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {\ul \row }\trowd \trqc\trgaph70\trleft265\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv
\brdrs\brdrw15 \clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx1618\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb 
\cellx2327\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx3152\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx4841
\pard\plain \s21\qc\sl-220\slmult0\widctlpar\intbl\adjustright \fs20\cgrid {ENPCANOT\cell 11%
\par 3%\cell 12%
\par 23%\cell 3%, 25%
\par 1%\cell }\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {\ul \row }\pard\plain \s21\qc\sl-220\slmult0\widctlpar\intbl\adjustright \fs20\cgrid {EBRANOT\cell 8%
\par 4%\cell 27%
\par 7%\cell 4%,16%
\par 4%\cell }\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {\ul \row }\trowd \trqc\trgaph70\trleft265\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 
\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx1618\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx2327
\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx3152\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx4841
\pard\plain \s21\qj\sl-220\slmult0\widctlpar\intbl\adjustright \fs20\cgrid {NATPANOT\cell }\pard \s21\qc\sl-220\slmult0\widctlpar\intbl\adjustright {17%
\par 1%\cell 53%
\par 6%\cell 13%, 15%
\par 9%\cell }\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {\ul \row }\pard\plain \s24\qc\sb240\widctlpar\adjustright \fs20\lang2057\cgrid {Table 6: Evaluation of 100 cases
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {
\par }{\lang2057 The results appear in Table 6, presenting the percentage of analyses considered respectively wrong and about which there were doubts about how to classify i
t. For morphology, the intermediate number concerns the forms which had only partly disambiguated information (considered as neither wrong nor doubtful). It should be noted that the lemma evaluation displayed in Table 6 reflects very often spelling errors
, foreign words, and wrong PoS assignment. This is especially true for NATPANOT, where 42% of the cases inspected (and considered wrong) were due to errors in the original corpus text.
\par {\listtext\pard\plain\s20 \b\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 4.\tab}}\pard\plain \s20\qc\fi-360\li360\sb240\sa60\sl-220\slmult0\widctlpar\jclisttab\tx360\ls38\adjustright \b\cgrid {\lang2057 
Automatic extraction of semantic relations from syntactic relations
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057\cgrid0 We c
oncentrate now on a specific task that uses parsed corpora as data for achieving a more complex goal. We apply a technique for automatic extraction of semantic relations from syntactic relations proposed in Gasperin (2001) and Gasperin et al. (2001), as a
n
 extended version of the technique proposed by Grefenstette (1994). This technique is based on the computation of word similarity through the syntactic contexts they share. (As syntactic context, we understand any word that establishes a syntactic relatio
n with a given word in the corpus.) 
\par We consider the following syntactic relations: an adjective as noun modifier, a noun as noun modifier (through a preposition), a noun as verb subject, a noun as verb direct object, and a noun as verb indirect object. The
 technique consists on extracting the syntactic contexts of each word from every occurrence of it in a parsed corpus, the words are compared as to occurrence in syntactic contexts, and words with many common syntactic contexts are considered semantically 
r
elated. To perform the comparison, the similarity measure used is a weighted version of the Jaccard measure, that assigns global and local weights for each syntactic context. We then extract lists of semantically related words for each word in the corpus,
 which are useful mainly for thesauri construction.
\par The parsed corpus is thus necessary to extract the syntactic relations used in the procedure described above. We wanted to observe how dependent was the whole procedure on the correctness of the parsing in
formation (specifically, PoS tags and function tags). In other words, if one extracts "wrong" syntactic contexts, how much this is reflected in the generation of noisy lists of semantically related words. 
\par So, we present, on the one hand, measures of the robustness of the extraction of each syntactic relation used, and then some experiments about its influence on the semantically related words obtained as the result.
\par {\listtext\pard\plain\s22 \b\fs22\lang2057 \hich\af0\dbch\af0\loch\f0 4.1.\tab}}\pard\plain \s22\qj\fi-567\li567\sb240\sa60\sl-220\slmult0\widctlpar\jclisttab\tx567\ls33\ilvl1\adjustright \b\fs22\cgrid {\lang2057\cgrid0 Measuring the }{\lang2057 
extraction}{\lang2057\cgrid0  procedure
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {To measure the correctness of the syntactic contexts ext
racted from the corpus, it was necessary to compare them manually with the original expressions in the corpus, aiming to discover parsing problems. So, we adopted the following procedure:
\par {\pntext\pard\plain\s21 \fs20\cgrid \hich\af0\dbch\af0\loch\f0 1.\tab}}\pard \s21\qj\fi-360\li689\sl-220\slmult0\widctlpar\jclisttab\tx689{\*\pn \pnlvlbody\ilvl0\ls39\pnrnot0\pndec\pnstart1\pnindent360\pnhang{\pntxta .}}\ls39\adjustright {
selecting a portion of the FOLHANOT corpus;
\par {\pntext\pard\plain\s21 \fs20\cgrid \hich\af0\dbch\af0\loch\f0 2.\tab}}\pard \s21\qj\fi-360\li689\sl-220\slmult0\widctlpar\jclisttab\tx689{\*\pn \pnlvlbody\ilvl0\ls39\pnrnot0\pndec\pnstart1\pnindent360\pnhang{\pntxta .}}\ls39\adjustright {
selecting the nouns of this portion;
\par {\pntext\pard\plain\s21 \fs20\cgrid \hich\af0\dbch\af0\loch\f0 3.\tab}}\pard \s21\qj\fi-360\li689\sl-220\slmult0\widctlpar\jclisttab\tx689{\*\pn \pnlvlbody\ilvl0\ls39\pnrnot0\pndec\pnstart1\pnindent360\pnhang{\pntxta .}}\ls39\adjustright {
extracting all the syntactic contexts of these nouns;
\par {\pntext\pard\plain\s21 \fs20\cgrid \hich\af0\dbch\af0\loch\f0 4.\tab}}\pard \s21\qj\fi-360\li689\sl-220\slmult0\widctlpar\jclisttab\tx689{\*\pn \pnlvlbody\ilvl0\ls39\pnrnot0\pndec\pnstart1\pnindent360\pnhang{\pntxta .}}\ls39\adjustright {
comparing manually the extracted contexts with the original expressions in the corpus;
\par {\pntext\pard\plain\s21 \fs20\cgrid \hich\af0\dbch\af0\loch\f0 5.\tab}}\pard \s21\qj\fi-360\li689\sl-220\slmult0\widctlpar\jclisttab\tx689{\*\pn \pnlvlbody\ilvl0\ls39\pnrnot0\pndec\pnstart1\pnindent360\pnhang{\pntxta .}}\ls39\adjustright {
classifying the parsing performance.
\par }\pard \s21\qj\sl-220\slmult0\widctlpar\adjustright {The portion extracted from the FOLHANOT corpus contains around 5,000 words, where around 1,000 are nouns. The syntactic contexts of these nouns were extracted, some examples are shown on Table 7.
\par 
\par }\trowd \trqc\trgaph70\trleft-70\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 \clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw15 
\clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx1545\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx2410\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr
\brdrdb\brdrw10 \cltxlrtb \cellx4599\pard \s21\qc\sl-220\slmult0\widctlpar\intbl\adjustright {Sentence\cell Nouns\cell Contexts\cell }\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {\fs20\cf2 \row }\trowd 
\trqc\trgaph70\trrh536\trleft-70\trkeep\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 \clvmgf\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrdb\brdrw10 \clbrdrb
\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx1545\clvertalc\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx2410\clvertalc\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb
\brdrs\brdrw15 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx4599\pard\plain \s35\qc\widctlpar\intbl\adjustright \f2\fs20\lang2057\cgrid {\i\f0 ... inicia a colheita da maior safra de sua hist\'f3ria ...
\par }{\f0 (... begins the crop of the largest production of its history ... )\cell }\pard\plain \s21\qc\sl-220\slmult0\widctlpar\intbl\adjustright \fs20\cgrid {\i colheita\cell }{<direct object, }{\i iniciar}{>
\par <modifier, }{\i de}{, }{\i safra}{>\cell }\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {\fs20\cf2 \row }\trowd \trqc\trgaph70\trrh535\trleft-70\trkeep\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr
\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 \clvmrg\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx1545\clvertalc\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 
\clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx2410\clvertalc\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx4599\pard\plain \s35\qc\widctlpar\intbl\adjustright 
\f2\fs20\lang2057\cgrid {\f0 \cell }\pard\plain \s21\qc\sl-220\slmult0\widctlpar\intbl\adjustright \fs20\cgrid {\i safra\cell }{<adjective, }{\i grande}{>
\par <modifies, }{\i de}{, }{\i colheita}{>
\par <modifier, }{\i de}{, }{\i hist\'f3ria}{>\cell }\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {\cf2 \row }\trowd \trqc\trgaph70\trrh535\trleft-70\trkeep\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr
\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 \clvmrg\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx1545\clvertalc\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 
\clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx2410\clvertalc\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx4599\pard\plain \s35\qc\widctlpar\intbl\adjustright 
\f2\fs20\lang2057\cgrid {\f0 \cell }\pard\plain \s21\qc\sl-220\slmult0\widctlpar\intbl\adjustright \fs20\cgrid {\i hist\'f3ria\cell }{<modifies, }{\i de}{, }{\i safra}{>\cell }\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {\cf2 \row 
}\pard\plain \s24\qc\sb240\widctlpar\adjustright \fs20\lang2057\cgrid {Table 7: Examples of syntactic contexts
\par }\pard\plain \s21\qj\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\cf2 
\par }\pard \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright {We classified each sy
ntactic context extracted as: (C) correctly parsed, (E) incorrectly parsed, and (FE) it wasn't extracted due to a parsing error. Table 8 shows the percentages of the contexts according to these classes.
\par 
\par }\trowd \trqc\trgaph70\trleft-70\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 \clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw15 
\clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx709\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx2268\pard \s21\qc\sl-220\slmult0\widctlpar\intbl\adjustright {Class\cell Percentage (%)\cell 
}\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {\ul \row }\trowd \trqc\trgaph70\trleft-70\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 
\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx709\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx2268
\pard\plain \s21\qc\sl-220\slmult0\widctlpar\intbl\adjustright \fs20\cgrid {C\cell 89.96\cell }\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {\ul \row }\pard\plain \s21\qc\sl-220\slmult0\widctlpar\intbl\adjustright \fs20\cgrid {E\cell 7.82
\cell }\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {\ul \row }\trowd \trqc\trgaph70\trleft-70\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 
\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx709\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx2268
\pard\plain \s21\qc\sl-220\slmult0\widctlpar\intbl\adjustright \fs20\cgrid {FE\cell 2.20\cell }\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {\ul \row }\pard\plain \s24\qc\sb240\widctlpar\adjustright \fs20\lang2057\cgrid {Tabl
e 8: Contexts according to parsing performance
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {
\par Some erroneous contexts were more frequent than the others. The E and FE contexts were distinguished according to specific points.}{\cf2  }{We can identify regular errors in the parsing information. Table 9 shows the 
most frequent parsing errors (or, in some cases, features) that generated the erroneous contexts, their percentage of occurrence and some examples.
\par \sect }\sectd \margtsxn1418\sbknone\linex0\headery1440\footery1440\colsx340\sectdefaultcl \pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {
\par }\trowd \trqc\trleft-5102\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 \clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw15 
\clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx-2676\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx-1400\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 
\clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx4502\pard\plain \qc\widctlpar\intbl\adjustright \fs22\lang2057\cgrid {\fs20 Errors\cell Occurrence%\cell Examples\cell }\pard \widctlpar\intbl\adjustright {\fs20 \row }\trowd \trqc\trleft-5102\trbrdrt
\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 \clvertalc\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb 
\cellx-2676\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx-1400\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrdb\brdrw10 \cltxlrtb 
\cellx4502\pard \widctlpar\intbl\adjustright {\fs20 Proper nouns as common nouns\cell }\pard \qc\widctlpar\intbl\adjustright {\fs20 17.28\cell }\pard \widctlpar\intbl\adjustright {\fs20 \ldblquote Barreiras\rdblquote  (organization name) was treate
d as the common noun meaning barrier or barricade; \ldblquote Folha\rdblquote  (newspaper name) was treated as the common noun meaning leaf\cell }\pard \widctlpar\intbl\adjustright {\fs20 \row }\pard \widctlpar\intbl\adjustright {\fs20 
Prepositional attachment errors\cell }\pard \qc\widctlpar\intbl\adjustright {\fs20 14.81\cell }\pard \widctlpar\intbl\adjustright {\fs20 \ldblquote expans\'e3o de soja na fronteira\rdblquote  (soy expansion on the boundary): \ldblquote fronteira
\rdblquote  is attached to \ldblquote soy\rdblquote  but should be attached to \ldblquote expans\'e3o\rdblquote \cell }\pard \widctlpar\intbl\adjustright {\fs20 \row }\pard \widctlpar\intbl\adjustright {\fs20 verb \ldblquote haver\rdblquote  (in the form 
\ldblquote h\'e1\rdblquote ) as preposition\cell }\pard \qc\widctlpar\intbl\adjustright {\fs20 2.46\cell }\pard \widctlpar\intbl\adjustright {\fs20 \ldblquote instaladas no local h\'e1 anos\rdblquote  (installed in the place for years)\cell }\pard 
\widctlpar\intbl\adjustright {\fs20 \row }\pard \widctlpar\intbl\adjustright {\fs20 preposition \ldblquote a\rdblquote  as determiner and vice-versa\cell }\pard \qc\widctlpar\intbl\adjustright {\fs20 1.23\cell }\pard \widctlpar\intbl\adjustright {\fs20 
\ldblquote se destina a implanta\'e7\'e3o\rdblquote  (it is destinated to the implantation) \cell }\pard \widctlpar\intbl\adjustright {\fs20 \row }\pard \widctlpar\intbl\adjustright {\fs20 prepositional phrase as adverbial phrase and vice-versa\cell 
}\pard \qc\widctlpar\intbl\adjustright {\fs20 7.04\cell }\pard \widctlpar\intbl\adjustright {\fs20 \ldblquote disputar o campeonato na Holanda\rdblquote  (dispute the championship on The Netherlands): \ldblquote na Holanda\rdblquote 
 should be as adverbial phrase\cell }\pard \widctlpar\intbl\adjustright {\fs20 \row }\pard \widctlpar\intbl\adjustright {\fs20 incorrect subject, direct object or indirect object tags \cell }\pard \qc\widctlpar\intbl\adjustright {\fs20 29.62\cell }\pard 
\widctlpar\intbl\adjustright {\fs20 \ldblquote impediu o plantio de feij\'e3o\rdblquote  (prohibited the plantation of beans): \ldblquote de feij\'e3o\rdblquote  should be a prepositional phrase instead of an indirect verb object\cell }\pard 
\widctlpar\intbl\adjustright {\fs20 \row }\pard \widctlpar\intbl\adjustright {\fs20 adjective as verb\cell }\pard \qc\widctlpar\intbl\adjustright {\fs20 11.11\cell }\pard \widctlpar\intbl\adjustright {\fs20 \ldblquote ano passado\rdblquote  (last year): 
\ldblquote passado\rdblquote  should be tagged as adjective instead of a verb form of to pass; \ldblquote pesquisas confi\'e1veis\rdblquote  (reliable research): \ldblquote confi\'e1veis\rdblquote  should be an adjective, not the verb \ldblquote 
to rely on\rdblquote \cell }\pard \widctlpar\intbl\adjustright {\fs20 \row }\pard \widctlpar\intbl\adjustright {\fs20 adjective as noun and vice-versa\cell }\pard \qc\widctlpar\intbl\adjustright {\fs20 7.40\cell }\pard \widctlpar\intbl\adjustright {\fs20 
\ldblquote quinta": referring to \ldblquote quinta-feira\rdblquote  (Thursday) instead of the ordinal number \ldblquote quinto\rdblquote  (fifth); \ldblquote alta de pre\'e7o\rdblquote  (price increase): \ldblquote alta\rdblquote  referring to \ldblquote 
the increase\rdblquote  instead of the adjective \ldblquote tall\rdblquote \cell }\pard \widctlpar\intbl\adjustright {\fs20 \row }\trowd \trqc\trleft-5102\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 
\trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 \clvertalc\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx-2676\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrdb\brdrw10 
\clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx-1400\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx4502\pard \widctlpar\intbl\adjustright {\fs20 verb as noun and vice-versa\cell }\pard 
\qc\widctlpar\intbl\adjustright {\fs20 6.17\cell }\pard \widctlpar\intbl\adjustright {\fs20 \ldblquote corrida\rdblquote  (run): running event instead of the running action\cell }\pard \widctlpar\intbl\adjustright {\fs20 \row }\pard\plain 
\s24\qc\sb240\widctlpar\adjustright \fs20\lang2057\cgrid {Table 9: Most frequent parsing errors
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {
\par \sect }\sectd \margtsxn1418\sbknone\linex0\headery1440\footery1440\cols2\colsx340\sectdefaultcl \pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {
It should be noted that, while from the point of view of the user (the extractor of syntactic contexts), they are considered errors, often the problems reported in Table 9 concern actual linguistic decisions made in the parsing process. For example, the c
lassification of }{\i h\'e1}{
 as a preposition was an actual choice of the parser developer. The same happens with the PoS marking of past participles as verbs, not matter whether they are adjectivally used or not. Finally, even properties of the CG formalism, namel
y the underspecification of attachment, can be felt as problems and give rise to errors. This shows clearly, in our view, the different assessment types when one is involved in user-visible and not user-transparent  evaluation.
\par After investigating the syntactic contexts, we used them to extract the semantic relations among the nouns.}{\lang2057 
\par {\listtext\pard\plain\s22 \b\fs22\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 4.2.\tab}}\pard\plain \s22\qj\fi-567\li567\sb240\sa60\sl-220\slmult0\widctlpar\jclisttab\tx567\ls33\ilvl1\adjustright \b\fs22\cgrid {\lang2057 Extracting semantic relations
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {To verify the influence of the erroneous syntactic contexts extracted from the corpus, we did two experiments: (1) we generated the lists of seman
tically related words to each noun using all the extracted contexts, and (2) we did the same using only the C and FE syntactic contexts. There is not a systematic measure to evaluate the homogeneity of the generated lists, so they were compared subjective
ly. 
\par Table 10 presents the lists of semantically related words to some of the nouns in the corpus for both experiments.
\par To have a good homogeneity level, the used portion of the corpus should be larger. But in this paper we focus on the differences between 
the lists generated on each experiment, while expecting to report the results of a larger-scale experiment further in Gasperin et al. (in preparation).
\par We can observe that the lists corresponding to experiment 2 are more homogeneous than the lists produced by experiment 1. They are smaller and less noisy. The position of the words in the list indicates more or less similarity with the word in focus.

\par 
\par }\pard\plain \widctlpar\adjustright \fs22\lang2057\cgrid {\sect }\sectd \margtsxn1418\sbknone\linex0\headery1440\footery1440\colsx340\sectdefaultcl \trowd \trqc\trleft-5102\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr
\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 \clvertalc\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx-3835\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw15 \clbrdrb
\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx-2559\clvertalt\clbrdrt\brdrdb\brdrw10 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx3497\pard\plain \widctlpar\intbl\adjustright \fs22\lang2057\cgrid {\cell 
}\pard \qc\widctlpar\intbl\adjustright {Experiment\cell }\pard \qj\li68\widctlpar\intbl\adjustright {Semantically related words\cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trrh124\trleft-5102\trkeep\trbrdrt\brdrdb\brdrw10 \trbrdrl
\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 \clvmgf\clvertalc\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx-3835\clvertalt
\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx-2559\clvertalc\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx3497\pard 
\qc\widctlpar\intbl\adjustright {expans\'e3o (expansion)\cell 1\cell }\pard\plain \s26\qj\li68\widctlpar\intbl\adjustright \fs18\lang2057\cgrid {\fs20 grosso exemplo lavoura monocultura t ha colheita\cell }\pard\plain \widctlpar\intbl\adjustright 
\fs22\lang2057\cgrid {\row }\trowd \trqc\trrh124\trleft-5102\trkeep\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 \clvmrg\clvertalc\clbrdrt\brdrs\brdrw15 
\clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx-3835\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx-2559\clvertalc\clbrdrt\brdrs\brdrw15 \clbrdrl
\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx3497\pard \qc\widctlpar\intbl\adjustright {\cell 2\cell }\pard \li68\widctlpar\intbl\adjustright {\fs20 lavoura monocultura t ha colheita\cell }\pard 
\widctlpar\intbl\adjustright {\row }\trowd \trqc\trrh124\trleft-5102\trkeep\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 \trbrdrv\brdrs\brdrw15 \clvmgf\clvertalc\clbrdrt
\brdrs\brdrw15 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx-3835\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx-2559\clvertalc\clbrdrt
\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrs\brdrw15 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx3497\pard \qc\widctlpar\intbl\adjustright {ha (hectare)\cell 1\cell }\pard \qj\li68\widctlpar\intbl\adjustright {\fs20 
milho palanque monocultura quilo nelore t grosso\cell }\pard \widctlpar\intbl\adjustright {\row }\trowd \trqc\trrh124\trleft-5102\trkeep\trbrdrt\brdrdb\brdrw10 \trbrdrl\brdrdb\brdrw10 \trbrdrb\brdrdb\brdrw10 \trbrdrr\brdrdb\brdrw10 \trbrdrh\brdrs\brdrw15 
\trbrdrv\brdrs\brdrw15 \clvmrg\clvertalc\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrdb\brdrw10 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrs\brdrw15 \cltxlrtb \cellx-3835\clvertalt\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrdb\brdrw10 \clbrdrr
\brdrs\brdrw15 \cltxlrtb \cellx-2559\clvertalc\clbrdrt\brdrs\brdrw15 \clbrdrl\brdrs\brdrw15 \clbrdrb\brdrdb\brdrw10 \clbrdrr\brdrdb\brdrw10 \cltxlrtb \cellx3497\pard \qc\widctlpar\intbl\adjustright {\cell 2\cell }\pard \li68\widctlpar\intbl\adjustright {
\fs20 quilo t km\-2 tonelada expans\'e3o\cell }\pard \widctlpar\intbl\adjustright {\row }\pard\plain \s24\qc\sb240\widctlpar\adjustright \fs20\lang2057\cgrid {Table 10: Semantically related words in the two experiments
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {
\par \sect }\sectd \margtsxn1418\sbknone\linex0\headery1440\footery1440\cols2\colsx340\sectdefaultcl {\listtext\pard\plain\s20 \b\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 5.\tab}\pard\plain \s20\qc\fi-360\li360\sb240\sa60\sl-220\slmult0\widctlpar
\jclisttab\tx360\ls38\adjustright \b\cgrid {\lang2057 Conclusions
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {\lang2057 No matter the obvious usefulness of ha
ving parsed corpora available on the Web for interrogation, or as raw data for further NLP processing, the linguistic information carried by AC/DC corpora is still far from reliable in many cases. This is one of the reasons the Floresta Sint}{\lang2070 
\'e1}{\lang2057 (c)tica project was launched (Afonso et al., 2002a, 2002b), so that human revision could create more reliable resources.
\par For the majority of the readers of the present paper, though, who are not interested in Portuguese NLP in itself, we suggest the following general conclusions:
\par {\pntext\pard\plain\s21 \f3\fs20\lang2057\cgrid \loch\af3\dbch\af0\hich\f3 \'b7\tab}}\pard \s21\qj\fi-360\li644\sl-220\slmult0\widctlpar\jclisttab\tx644{\*\pn \pnlvlblt\ilvl0\ls36\pnrnot0\pnf3\pnstart1\pnindent360\pnhang{\pntxtb \'b7}}\ls36\adjustright {
\lang2057 one has to measure carefully what is the }{\i\lang2057 difficulty}{\lang2057  of a particular task, before trying to evaluate the result of performing that task
\par {\pntext\pard\plain\s21 \f3\fs20\lang2057\cgrid \loch\af3\dbch\af0\hich\f3 \'b7\tab}}\pard \s21\qj\fi-360\li644\sl-220\slmult0\widctlpar\jclisttab\tx644{\*\pn \pnlvlblt\ilvl0\ls36\pnrnot0\pnf3\pnstart1\pnindent360\pnhang{\pntxtb \'b7}}\ls36\adjustright {
\lang2057 there are implementable ways of measuring such a apriori difficulty, given a parsed corpus
\par {\pntext\pard\plain\s21 \f3\fs20\lang2057\cgrid \loch\af3\dbch\af0\hich\f3 \'b7\tab}}\pard \s21\qj\fi-360\li644\sl-220\slmult0\widctlpar\jclisttab\tx644{\*\pn \pnlvlblt\ilvl0\ls36\pnrnot0\pnf3\pnstart1\pnindent360\pnhang{\pntxtb \'b7}}\ls36\adjustright {
\lang2057 many apparently straightforward tasks, such as assigning objects or identifying tense or PoS turn out to be trickier than expected
\par {\pntext\pard\plain\s21 \f3\fs20\lang2057\cgrid \loch\af3\dbch\af0\hich\f3 \'b7\tab}}\pard \s21\qj\fi-360\li644\sl-220\slmult0\widctlpar\jclisttab\tx644{\*\pn \pnlvlblt\ilvl0\ls36\pnrnot0\pnf3\pnstart1\pnindent360\pnhang{\pntxtb \'b7}}\ls36\adjustright {
\lang2057 different applications and users may be interested in different properties and aspects of a parsed corpus, so one should evaluate }{\i\lang2057 relative}{\lang2057  to a given need.
\par {\listtext\pard\plain\s20 \b\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 6.\tab}}\pard\plain \s20\qc\fi-360\li360\sb240\sa60\sl-220\slmult0\widctlpar\jclisttab\tx360\ls38\adjustright \b\cgrid {\lang2057 Acknowledgements
\par }\pard\plain \s21\qj\fi284\sl-220\slmult0\widctlpar\adjustright \fs20\cgrid {We are most grateful to Vera Strube de L\'fa
cia for her supervision of the second author in her dissertation on extracting semantic relations from syntactic contexts, without which this paper could not have been written.
\par {\listtext\pard\plain\s20 \b\lang2057\cgrid \hich\af0\dbch\af0\loch\f0 7.\tab}}\pard\plain \s20\qc\fi-360\li360\sb240\sa60\sl-220\slmult0\widctlpar\jclisttab\tx360\ls38\adjustright \b\cgrid {\lang2057 References
\par }\pard\plain \s25\qj\fi-198\li198\sl-220\slmult0\widctlpar\adjustright \fs20\lang2057\cgrid {Afonso, Susana, }{Eckhard Bick, Renato Haber and Diana Santos. (2002a). Floresta sint\'e1(c)tica: um treebank para o portugu\'eas. In }{\i 
Actas do XVII Encontro da Associa\'e7\'e3o Portuguesa de Lingu\'edstica}{. Lisboa: APL.
\par }{Afonso, Susana, }{Eckhard Bick, Renato Haber and Diana Santos. (2002b). Floresta sint\'e1(c)tica: a treebank for Portuguese. In }{\i Proceedings of LREC2002 }{(this volume). 
\par Black, E., S. Abney, D. Flickinger, C. Gdaniek, R. Grishman, P. Harrison, D. Hindle, R. Ingria, F. Jelinek, J. Klavans, M. Liberman, M. Marcus, 
S. Roukos, B. Santorini and T. Strzalkowski. (1991). A procedure for quantitatively comparing the syntactic coverage of English grammars. In }{\i Proceedings of the February 1991 DARPA Speech and Natural Language Workshop}{ (pp. 306--311).
\par Bick, Eckhard. (1998). Structural lexical heuristics in the automatic analysis of Portuguese. In Bente Maegaard (Ed.), }{\i Proceedings of the 11th Nordic Conference on Computational Linguistics, Nodalida \lquote 98 }{(pp. 44--56). Copenhaguen.}{
\par Bick, Eckhard. (2000). }{\i The Parsing System "Palavras": Automatic Grammatical Analysis of Portuguese in a Constraint Grammar Framework}{. Aarhus: Aarhus University Press.
\par Carroll, John, Ted Briscoe and Antonio Sanfilippo. (1998). Parser evaluation: a Survey and a New Proposal. In Antonio Rubio, Natividad Gallardo, Rosa Castro and Antonio Tejada (Eds.), }{\i 
Proceedings of The First International Conference on Language Resources and Evaluation}{ (Vol 1, pp. 447--454). Granada: ELRA.
\par Gaizauskas, Robert. (1998). Evaluation in language and speech technology. }{\i Computer Speech and Language, }{12(4), 249-62.
\par Gasperin, Caroline. (2001). }{\i Extra\'e7\'e3o autom\'e1tica de rela\'e7\'f5es sem\'e2nticas a partir de rela\'e7\'f5es sint\'e1ticas}{ [Automatic extraction of semantic relations from syntactic relations]. MSc thesis, Porto Alegre, Brazil: PPGCC-PUCRS.

\par }{Gasperin, Caroline, Pablo Gamallo, }{\cgrid0 Alexandre Agustini, Gabriel Lopes}{ and Vera de Lima. (2001). Using Syntactic Contexts for Measuring Word Similarity. In Alessandro Lenci, Simonetta Montemagni and Vito Pirrelli (Eds.), }{\i 
Proceedings of the workshop "The Acquisition and Representation of Word Meaning", ESSLI'01}{. Helsinki.
\par }{Gasperin, Caroline, }{\lang1044 Diana Santos and Vera Strube de Lima. (In preparation). Semantic relatedness among words: what is required from syntax?.}{
\par Grefenstette, Gregory. (1994). }{\i Explorations in automatic thesaurus discovery}{. Kluwer Academic Publishers.
\par }{Hindle, Donald and Mats Rooth. (1993). Structural Ambiguity and Lexical Relations.}{\i  Computational Linguistics}{,}{\i  }{19(1), 103--120.
\par Johansson, Stig, Jarle Ebeling and Signe Oksefjell. (1999). English-Norwegian Parallel Corpus: Manual. Univ. of Oslo: Department of British and American Studies, }{\field\flddirty{\*\fldinst { HYPERLINK http://www.hf.uio.no/iba/prosjekt/ENPCmanual.html }{
{\*\datafield 
00d0c9ea79f9bace118c8200aa004ba90b02000000170000003200000068007400740070003a002f002f007700770077002e00680066002e00750069006f002e006e006f002f006900620061002f00700072006f0073006a0065006b0074002f0045004e00500043006d0061006e00750061006c002e00680074006d006c00
0000e0c9ea79f9bace118c8200aa004ba90b6400000068007400740070003a002f002f007700770077002e00680066002e00750069006f002e006e006f002f006900620061002f00700072006f0073006a0065006b0074002f0045004e00500043006d0061006e00750061006c002e00680074006d006c000000}}
}{\fldrslt {\cs32\ul\cf2 http://www.hf.uio.no/iba/prosjekt/ENPCmanual.html}}}{
\par Lin, Dekang. (1995). A dependency-based method for evaluation broad-coverage parsers. }{\i\cgrid0 Proceedings of }{\i IJCAI'95}{\cgrid0  }{(pp. 1420--1425). }{\cgrid0 San Mateo, Calif: Morgan Kaufmann Publishers}{.
\par }{Medeiros, Jos\'e9 Carlos, Rui Marques and Diana Santos. (1993). Portugu\'eas Quantitativo. In}{\i  Actas do 1.o Encontro de Processamento de L\'edngua Portuguesa (Escrita e Falada), EPLP'93}{ (pp. 33--38). Lisboa.
\par Reis, Regina. (1993). Dicion\'e1rios de l\'edngua corrente: algumas considera\'e7\'f5es. In}{\i  Actas do 1.o Encontro de Processamento de L\'edngua Portuguesa (Escrita e Falada), EPLP'93}{ (pp. 141--146). Lisboa.
\par Santos, Diana. (1996). Portugu\'eas Computacional. In In\'eas Duarte and Isabel Leiria (Eds.), }{\i Actas do Congresso Internacional sobre o portugu\'eas}{ (Volume III, pp.167--184). Lisboa: Edi\'e7\'f5es Colibri / APL.}{
\par Santos, Diana. (1999). }{Toward Language-specific Applications. }{\i Machine Translation,}{ 14(2), 83--112.}{
\par Santos, Diana and Eckhard Bick. (2000). Providing Internet access to Portuguese corpora: the AC/DC project. In M. Gavriladou, G. Carayannis, S. Markantonatou, S. Piperidis and G. Stainhaouer (eds.), }{\i Proceedings of t
he Second International Conference on Language Resources and Evaluation, LREC2000}{ (pp. 205--210). Athens: ELRA.
\par }{Santos, Diana and Signe Oksefjell. (1999). Using a Parallel Corpus to Validate Independent Claims. }{\i Languages in contrast}{, 2(1), 117--132.}{
\par }{Santos, Diana and Paulo Rocha. (2001). Evaluating CETEMP\'fablico, a free resource for Portuguese. In }{\i Proceedings of the 39}{\i\super th}{\i  Annual Meeting of the Association for Computational Linguistics}{ (pp. 442--449). ACL.
\par }}