Authors
Börkur Sigurbjörnsson, Jaap Kamps, and Maarten de Rijke.
book
Proceedings of the Thirteenth Conference on Information and Knowledge Management (CIKM 2004). Pages: 371-380. 2004. [acm]
abstract
Document-centric XML collections contain text-rich documents, marked up with XML tags that add lightweight semantics to the text. Querying such collections calls for a hybrid query language: the text-rich nature of the documents suggests a content-oriented (IR) approach, while the mark-up allows users to add structural constraints to their IR queries. Hybrid queries tend to be more expressive, which should lead—in principle—to better retrieval performance. In practice, the processing of these hybrid queries within an IR systems turns out to be far from trivial, because a delicate balance between structural and content information needs to be sought. We propose an approach to processing such hybrid content-and-structure queries that decomposes a query into multiple content-only queries whose results are then combined in ways determined by the structural constraints of the original query. We evaluate our methods using the INEX 2003 test-suite, and show (1) that effective ways of processing of content-oriented XPath queries are non-trivial, (2) that there are differences in the effectiveness for different topics types, but (3) that with appropriate processing methods retrieval effectiveness can improve.
bibtex
@inproceedings{10.1145/1031171.1031247,
author = {Sigurbj\"{o}rnsson, B\"{o}rkur and Kamps, Jaap and de Rijke, Maarten},
title = {Processing content-oriented XPath queries},
year = {2004},
isbn = {1581138741},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1031171.1031247},
doi = {10.1145/1031171.1031247},
booktitle = {Proceedings of the Thirteenth ACM International Conference on Information and Knowledge Management},
pages = {371–380},
numpages = {10},
keywords = {XML retrieval, XPath, content and structure},
location = {Washington, D.C., USA},
series = {CIKM '04}
}