Update index.html
Browse files- index.html +206 -18
index.html
CHANGED
|
@@ -1,19 +1,207 @@
|
|
| 1 |
-
<!
|
| 2 |
-
<html>
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
</html>
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="vi">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<title>Vietnamese NLP: POS Tagging Benchmarks</title>
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
| 7 |
+
<style>
|
| 8 |
+
body { font-family: 'Segoe UI', Arial, sans-serif; margin: 0; background: #f6faff; color: #222; }
|
| 9 |
+
.container { max-width: 980px; margin: 40px auto; padding: 20px 28px; background: #fff; border-radius: 16px; box-shadow: 0 2px 12px #0001;}
|
| 10 |
+
h1 { color: #154e9e; font-size: 2.2rem; margin-bottom: 0.25em;}
|
| 11 |
+
h2 { color: #198754; border-left: 5px solid #b3d1ff; padding-left: 10px;}
|
| 12 |
+
h3 { color: #212529; margin-top: 2em;}
|
| 13 |
+
table { width: 100%; border-collapse: collapse; margin-top: 16px; margin-bottom: 20px; }
|
| 14 |
+
th, td { padding: 8px 12px; text-align: left; border-bottom: 1px solid #eee; }
|
| 15 |
+
th { background: #eaf1fb; font-weight: bold; }
|
| 16 |
+
tr:hover { background: #f5faff;}
|
| 17 |
+
a { color: #2766cc; text-decoration: none; }
|
| 18 |
+
a:hover { text-decoration: underline; }
|
| 19 |
+
.note { color: #444; background: #f3f8ff; border-left: 4px solid #85b9ff; padding: 7px 18px; margin: 15px 0;}
|
| 20 |
+
.icon { font-size: 1.1em; margin-right: 6px; }
|
| 21 |
+
.section { margin-bottom: 2.2em; }
|
| 22 |
+
.papers-list, .tools-list { margin: 0 0 1.5em 0; padding: 0; list-style: none;}
|
| 23 |
+
.papers-list li, .tools-list li { margin: 0.3em 0;}
|
| 24 |
+
.tools-list code { background: #e0e6ed; border-radius: 4px; padding: 1px 4px; }
|
| 25 |
+
@media (max-width: 700px) {
|
| 26 |
+
.container { padding: 8px;}
|
| 27 |
+
table, th, td { font-size: 15px;}
|
| 28 |
+
}
|
| 29 |
+
</style>
|
| 30 |
+
</head>
|
| 31 |
+
<body>
|
| 32 |
+
<div class="container">
|
| 33 |
+
<h1>π Vietnamese NLP β POS Tagging Benchmarks & Resources</h1>
|
| 34 |
+
<div class="section">
|
| 35 |
+
<h2>1. VLSP 2013 POS Tagging</h2>
|
| 36 |
+
<div class="note">
|
| 37 |
+
<span class="icon">π</span>
|
| 38 |
+
<b>Dataset:</b> 27,000+ sentences for training, 870 dev, 2,120 test (from VLSP 2013 Shared Task)
|
| 39 |
+
</div>
|
| 40 |
+
<table>
|
| 41 |
+
<tr>
|
| 42 |
+
<th>Model</th>
|
| 43 |
+
<th>Accuracy</th>
|
| 44 |
+
<th>Method / Reference</th>
|
| 45 |
+
<th>Code</th>
|
| 46 |
+
</tr>
|
| 47 |
+
<tr>
|
| 48 |
+
<td>PhoBERT-large</td>
|
| 49 |
+
<td>96.8</td>
|
| 50 |
+
<td><a href="https://arxiv.org/abs/2003.00744">Nguyen et al. ArXiv'20</a></td>
|
| 51 |
+
<td><a href="https://github.com/VinAIResearch/PhoBERT">Official</a></td>
|
| 52 |
+
</tr>
|
| 53 |
+
<tr>
|
| 54 |
+
<td>vELECTRA</td>
|
| 55 |
+
<td>96.77</td>
|
| 56 |
+
<td><a href="https://arxiv.org/abs/2006.15994">Bui et al. ArXiv'20</a></td>
|
| 57 |
+
<td><a href="https://github.com/fpt-corp/viBERT">Official</a></td>
|
| 58 |
+
</tr>
|
| 59 |
+
<tr>
|
| 60 |
+
<td>PhoBERT-base</td>
|
| 61 |
+
<td>96.7</td>
|
| 62 |
+
<td><a href="https://arxiv.org/abs/2003.00744">Nguyen et al. ArXiv'20</a></td>
|
| 63 |
+
<td><a href="https://github.com/VinAIResearch/PhoBERT">Official</a></td>
|
| 64 |
+
</tr>
|
| 65 |
+
<tr>
|
| 66 |
+
<td>VnMarMoT</td>
|
| 67 |
+
<td>95.88</td>
|
| 68 |
+
<td><a href="http://aclweb.org/anthology/N18-5012">Nguyen et al. NAACL'18</a></td>
|
| 69 |
+
<td><a href="https://github.com/vncorenlp/VnCoreNLP">Official</a></td>
|
| 70 |
+
</tr>
|
| 71 |
+
<tr>
|
| 72 |
+
<td>BiLSTM-CRFs + CNN-char</td>
|
| 73 |
+
<td>95.40</td>
|
| 74 |
+
<td><a href="http://aclweb.org/anthology/N18-5012">Ma et al. ACL'16</a></td>
|
| 75 |
+
<td><a href="https://github.com/UKPLab/emnlp2017-bilstm-cnn-crf/">Link</a></td>
|
| 76 |
+
</tr>
|
| 77 |
+
<tr>
|
| 78 |
+
<td>BiLSTM-CRF + LSTM-char</td>
|
| 79 |
+
<td>95.31</td>
|
| 80 |
+
<td><a href="http://www.aclweb.org/anthology/N16-1030">Lample et al. NAACL'16</a></td>
|
| 81 |
+
<td><a href="https://github.com/UKPLab/emnlp2017-bilstm-cnn-crf/">Link</a></td>
|
| 82 |
+
</tr>
|
| 83 |
+
<tr>
|
| 84 |
+
<td>BiLSTM-CRF</td>
|
| 85 |
+
<td>95.31</td>
|
| 86 |
+
<td><a href="https://arxiv.org/abs/1508.01991">Huang et al. ArXiv'15</a></td>
|
| 87 |
+
<td><a href="https://github.com/UKPLab/emnlp2017-bilstm-cnn-crf/">Link</a></td>
|
| 88 |
+
</tr>
|
| 89 |
+
<tr>
|
| 90 |
+
<td>RDRPOSTagger</td>
|
| 91 |
+
<td>95.11</td>
|
| 92 |
+
<td><a href="https://www.researchgate.net/publication/279916333_RDRPOSTagger_A_Ripple_Down_Rules-based_Part-Of-Speech_Tagger">Nguyen et al. EACL'14</a></td>
|
| 93 |
+
<td><a href="https://github.com/datquocnguyen/rdrpostagger">Official</a></td>
|
| 94 |
+
</tr>
|
| 95 |
+
<tr>
|
| 96 |
+
<td>JointWPD</td>
|
| 97 |
+
<td>94.03</td>
|
| 98 |
+
<td><a href="https://arxiv.org/pdf/1812.11459.pdf">Nguyen et al. '18</a></td>
|
| 99 |
+
<td></td>
|
| 100 |
+
</tr>
|
| 101 |
+
</table>
|
| 102 |
+
</div>
|
| 103 |
+
|
| 104 |
+
<div class="section">
|
| 105 |
+
<h2>2. VietTreeBank</h2>
|
| 106 |
+
<div class="note">
|
| 107 |
+
<span class="icon">π</span>
|
| 108 |
+
<b>Paper:</b> <a href="https://hal.inria.fr/inria-00421103v2/document">VietTreeBank Paper</a> <br>
|
| 109 |
+
<b>Dataset:</b> train: 7,268 | dev: 1,038 | test: 2,077 sentences
|
| 110 |
+
</div>
|
| 111 |
+
<table>
|
| 112 |
+
<tr>
|
| 113 |
+
<th>Model</th>
|
| 114 |
+
<th>Accuracy</th>
|
| 115 |
+
<th>Method</th>
|
| 116 |
+
<th>Code</th>
|
| 117 |
+
<th>Note</th>
|
| 118 |
+
</tr>
|
| 119 |
+
<tr>
|
| 120 |
+
<td>BiLSTM-CRFs</td>
|
| 121 |
+
<td>93.52</td>
|
| 122 |
+
<td><a href="https://arxiv.org/pdf/1811.03754.pdf">Nguyen et al. '18</a></td>
|
| 123 |
+
<td><a href="https://github.com/duongna21/VNsequencelabeling">Official</a></td>
|
| 124 |
+
<td>10-fold CV</td>
|
| 125 |
+
</tr>
|
| 126 |
+
<tr>
|
| 127 |
+
<td>VNTagger</td>
|
| 128 |
+
<td>93.40</td>
|
| 129 |
+
<td><a href="https://hal.inria.fr/inria-00526139/document">Le et al. TALN'10</a></td>
|
| 130 |
+
<td><a href="http://mim.hus.vnu.edu.vn/dsl/tools/tagger">Official</a></td>
|
| 131 |
+
<td>10-fold CV</td>
|
| 132 |
+
</tr>
|
| 133 |
+
<tr>
|
| 134 |
+
<td>RDRPOSTagger</td>
|
| 135 |
+
<td>91.96</td>
|
| 136 |
+
<td><a href="http://aclweb.org/anthology/I17-3010">Pham et al. IJCNLP'17</a></td>
|
| 137 |
+
<td><a href="https://github.com/datquocnguyen/RDRPOSTagger">Official</a></td>
|
| 138 |
+
<td>5-fold CV</td>
|
| 139 |
+
</tr>
|
| 140 |
+
<tr>
|
| 141 |
+
<td>NNVLP</td>
|
| 142 |
+
<td>91.92</td>
|
| 143 |
+
<td><a href="http://aclweb.org/anthology/I17-3010">Pham et al. IJCNLP'17</a></td>
|
| 144 |
+
<td><a href="https://github.com/pth1993/NNVLP">Official</a></td>
|
| 145 |
+
<td>5-fold CV</td>
|
| 146 |
+
</tr>
|
| 147 |
+
<tr>
|
| 148 |
+
<td>vTools</td>
|
| 149 |
+
<td>90.73</td>
|
| 150 |
+
<td><a href="https://drive.google.com/file/d/1V06YfENrguQk2SRJFbpwWzapxpgPPaPS/view?usp=sharing">Tran et al. VLSP'13</a></td>
|
| 151 |
+
<td><a href="https://github.com/lupanh/vTools">Official</a></td>
|
| 152 |
+
<td></td>
|
| 153 |
+
</tr>
|
| 154 |
+
<tr>
|
| 155 |
+
<td>Vitk</td>
|
| 156 |
+
<td>88.41</td>
|
| 157 |
+
<td></td>
|
| 158 |
+
<td><a href="https://github.com/phuonglh/vn.vitk">Official</a></td>
|
| 159 |
+
<td></td>
|
| 160 |
+
</tr>
|
| 161 |
+
</table>
|
| 162 |
+
</div>
|
| 163 |
+
|
| 164 |
+
<div class="section">
|
| 165 |
+
<h2>3. Social Media POS Tagging</h2>
|
| 166 |
+
<ul class="papers-list">
|
| 167 |
+
<li>π <a href="https://www.researchgate.net/publication/309176280_Vietnamese_POS_Tagging_for_Social_Media_Text">Vietnamese POS Tagging for Social Media Text - Ngo et al. 2016</a></li>
|
| 168 |
+
<li>π <a href="https://www.researchgate.net/publication/335361630_A_POS_Tagging_Model_for_Vietnamese_Social_Media_Text_Using_BiLSTM-CRF_with_Rich_Features">A POS Tagging Model for Vietnamese Social Media Text Using BiLSTM-CRF with Rich Features - Ngo et al. 2019</a></li>
|
| 169 |
+
<li>π <a href="https://www.researchgate.net/publication/321940724_An_Empirical_Study_on_POS_Tagging_for_Vietnamese_Social_Media_Text">An Empirical Study on POS Tagging for Vietnamese Social Media Text - Ngo et al. 2017</a></li>
|
| 170 |
+
</ul>
|
| 171 |
+
</div>
|
| 172 |
+
|
| 173 |
+
<div class="section">
|
| 174 |
+
<h2>4. Miscellaneous Papers & Datasets</h2>
|
| 175 |
+
<ul class="papers-list">
|
| 176 |
+
<li>π <a href="https://drive.google.com/file/d/1V6zFx7p-tLV6ZRiyLhVvbjI12PKyQnmF/view?usp=sharing">Nguyen et al. NICS'18 β Building Vietnamese Linguistic Resources for Social Network Text Analysis</a></li>
|
| 177 |
+
<li>π <a href="https://arxiv.org/pdf/1711.04951.pdf">Nguyen et al. ALTA'17</a></li>
|
| 178 |
+
<li>π <a href="https://arxiv.org/pdf/1412.4021.pdf">Nguyen et al. 2015</a></li>
|
| 179 |
+
<li>π <a href="http://www.aclweb.org/anthology/E14-2005">Nguyen et al. 2014</a></li>
|
| 180 |
+
<li>π <a href="https://link.springer.com/chapter/10.1007/978-3-642-19400-9_15">Nguyen et al. 2011</a></li>
|
| 181 |
+
<li>π <a href="http://ieeexplore.ieee.org/document/6063458/?reload=true">Nguyen et al. 2011</a></li>
|
| 182 |
+
<li>π <a href="http://www.aclweb.org/anthology/I11-1035">Nguyen et al. 2010</a></li>
|
| 183 |
+
<li>π <a href="https://www.researchgate.net/publication/309176280_Vietnamese_POS_Tagging_for_Social_Media_Text">Ngo et al. 2016</a></li>
|
| 184 |
+
<li>π <a href="http://www.jaist.ac.jp/~bao/VLSP-text/ICTrda08/ICT08-VLSP-SP83.pdf">Phan et al. 2008</a></li>
|
| 185 |
+
<li>π <a href="http://www.vnulib.edu.vn:8000/dspace/bitstream/123456789/1801/1/sedev0206-02.pdf">Nguyen et al. 2006</a></li>
|
| 186 |
+
<li>π <a href="http://www.vietlex.com/xu-li-ngon-ngu/50-A_Case_Study_in_POS_Tagging_of_Vietnamese_Texts">Nguyen et al. 2003</a></li>
|
| 187 |
+
</ul>
|
| 188 |
+
</div>
|
| 189 |
+
|
| 190 |
+
<div class="section">
|
| 191 |
+
<h2>5. Tools, Demos & Open Source Code</h2>
|
| 192 |
+
<ul class="tools-list">
|
| 193 |
+
<li>π <a href="http://doc.openfpt.vn/#vietnamese-accentizer">OpenFPT: Vietnamese Accentizer</a></li>
|
| 194 |
+
<li>π <a href="https://github.com/vncorenlp/VnCoreNLP">vncorenlp/VnCoreNLP</a> <code>java</code></li>
|
| 195 |
+
<li>π <a href="https://github.com/pth1993/NNVLP">pth1993/NNVLP</a> <code>python,bash</code></li>
|
| 196 |
+
<li>π <a href="https://pypi.python.org/pypi/pyvi">pyvi</a> <code>python</code></li>
|
| 197 |
+
<li>π <a href="https://github.com/phuonglh/vn.vitk">Vitk</a> <code>java</code></li>
|
| 198 |
+
<li>π <a href="https://github.com/kanjirz50/viet-morphological-analysis-crf">viet-morphological-analysis-crf</a> <code>python</code> (<a href="http://160.16.58.116/vietnamese/morph_crf">demo</a>)</li>
|
| 199 |
+
<li>π <a href="https://github.com/lupanh/vTools">lupanh/vTools</a> <code>python</code></li>
|
| 200 |
+
<li>π <a href="https://github.com/truongdo/vita">truongdo/vita</a> <code>c++</code></li>
|
| 201 |
+
<li>π <a href="http://rdrpostagger.sourceforge.net/">RDRPOSTagger</a> <code>python</code></li>
|
| 202 |
+
<li>π <a href="http://vlsp.hpda.vn:8080/demo/?page=resources">vnTagger</a> <code>java</code></li>
|
| 203 |
+
</ul>
|
| 204 |
+
</div>
|
| 205 |
+
</div>
|
| 206 |
+
</body>
|
| 207 |
</html>
|