Authors
Börkur Sigurbjörnsson, Jaap Kamps, and Maarten de Rijke.
book
Accessing Multilingual Information Repositories: 6th Workshop of the Cross-Language Evaluation Forum, CLEF 2005. Lecture Notes in Computer Science. 2006. [Springer] [ACM DL]
abstract
EuroGOV is a multilingual web corpus that was created to serve as the document collection for WebCLEF, the CLEF 2005 web retrieval task. EuroGOV is a collection of web pages crawled from the European Union portal, European Union member state governmental web sites, and Russian governmental web sites. The corpus contains over 3 million documents written in more than 20 different European languages. In this paper we provide a detailed description of the EuroGOV collection.
bibtex
@inproceedings{10.1007/11878773_90,
author = {Sigurbj\"{o}rnsson, B\"{o}rkur and Kamps, Jaap and de Rijke, Maarten},
title = {EuroGOV: engineering a multilingual web corpus},
year = {2005},
isbn = {354045697X},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
url = {https://doi.org/10.1007/11878773_90},
doi = {10.1007/11878773_90},
booktitle = {Proceedings of the 6th International Conference on Cross-Language Evalution Forum: Accessing Multilingual Information Repositories},
pages = {825–836},
numpages = {12},
location = {Vienna, Austria},
series = {CLEF'05}
}