-
Notifications
You must be signed in to change notification settings - Fork 4
/
bibfile.bib
142 lines (131 loc) · 4.33 KB
/
bibfile.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
@misc{howard2018universal,
title={Universal Language Model Fine-tuning for Text Classification},
author={Jeremy Howard and others},
year={2018},
eprint={1801.06146},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{vaswani2017attention,
title={Attention Is All You Need},
author={Ashish Vaswani and others},
year={2017},
eprint={1706.03762},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{sun2019finetune,
title={How to Fine-Tune BERT for Text Classification?},
author={Chi Sun and others},
year={2019},
eprint={1905.05583},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@inproceedings{Gray2017GPUKF,
title={GPU Kernels for Block-Sparse Weights},
author={Scott Gray and others},
year={2017}
}
@misc{yang2019xlnet,
title={XLNet: Generalized Autoregressive Pretraining for Language Understanding},
author={Zhilin Yang and others},
year={2019},
eprint={1906.08237},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{devlin2018bert,
title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
author={Jacob Devlin and others},
year={2018},
eprint={1810.04805},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@article{radford2019language,
title={Language Models are Unsupervised Multitask Learners},
author={Radford, Alec and others},
year={2019}
}
@misc{shoeybi2019megatronlm,
title={Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism},
author={Mohammad Shoeybi and others},
year={2019},
eprint={1909.08053},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@article{hochreiter1997long,
added-at = {2016-11-15T08:49:43.000+0100},
author = {Hochreiter, Sepp and others},
biburl = {https://www.bibsonomy.org/bibtex/2a4a80026d24955b267cae636aa8abe4a/dallmann},
interhash = {0692b471c4b9ae65d00affebc09fb467},
intrahash = {a4a80026d24955b267cae636aa8abe4a},
journal = {Neural computation},
keywords = {lstm rnn},
number = 8,
pages = {1735--1780},
publisher = {MIT Press},
timestamp = {2016-11-15T08:49:43.000+0100},
title = {Long short-term memory},
volume = 9,
year = 1997
}
@misc{wang2018glue,
title={GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding},
author={Alex Wang and others},
year={2018},
eprint={1804.07461},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{peters2018deep,
title={Deep contextualized word representations},
author={Matthew E. Peters and others},
year={2018},
eprint={1802.05365},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@inproceedings{wordPiece,
title = {Japanese and Korean Voice Search},
author = {Mike Schuster and others},
year = {2012},
booktitle = {International Conference on Acoustics, Speech and Signal Processing},
pages = {5149--5152}
}
@incollection{mikolov2013,
title = {Distributed Representations of Words and Phrases and their Compositionality},
author = {Mikolov, Tomas and others},
booktitle = {Advances in Neural Information Processing Systems 26},
editor = {C. J. C. Burges and L. Bottou and M. Welling and Z. Ghahramani and K. Q. Weinberger},
pages = {3111--3119},
year = {2013},
publisher = {Curran Associates, Inc.},
url = {http://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf}
}
@inproceedings{penningtonglove,
title = "{G}love: Global Vectors for Word Representation",
author = "Pennington, Jeffrey and
others",
booktitle = "Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing ({EMNLP})",
month = oct,
year = "2014",
address = "Doha, Qatar",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/D14-1162",
doi = "10.3115/v1/D14-1162",
pages = "1532--1543",
}
@inproceedings{Radford2018ImprovingLU,
title={Improving Language Understanding by Generative Pre-Training},
author={Alec Radford and others},
year={2018}
}
@inproceedings{mikolov2018advances,
title={Advances in Pre-Training Distributed Word Representations},
author={Mikolov, Tomas and others},
booktitle={Proceedings of the International Conference on Language Resources and Evaluation (LREC 2018)},
year={2018}
}