references.bib

@article{Bonifati2019,
  title = {An analytical study of large SPARQL query logs},
  volume = {29},
  ISSN = {0949-877X},
  url = {http://dx.doi.org/10.1007/s00778-019-00558-9},
  DOI = {10.1007/s00778-019-00558-9},
  number = {2–3},
  journal = {The VLDB Journal},
  publisher = {Springer Science and Business Media LLC},
  author = {Bonifati,  Angela and Martens,  Wim and Timm,  Thomas},
  year = {2019},
  month = aug,
  pages = {655–679}
}

@inproceedings{dia2018fast,
  title={Fast SPARQL join processing between distributed streams and stored RDF graphs using bloom filters},
  author={Dia, Amadou Fall and Aoul, Zakia Kazi and Boly, Aliou and M{\'e}tais, Elisabeth},
  booktitle={2018 12th International Conference on Research Challenges in Information Science (RCIS)},
  pages={1--12},
  year={2018},
  organization={IEEE}
}


@misc{shapetreesShapeTrees,
	author = {},
	title = {{S}hape {T}rees {S}pecification --- shapetrees.org},
	howpublished = {\url{https://shapetrees.org/TR/specification/}},
	year = {Eric Prud'hommeaux, Justin Bingham },
	note = {[Accessed 06-12-2024]},
}

@misc{w3SPARQLQuery,
	author = {W3C},
	title = {{S}{P}{A}{R}{Q}{L} 1.1 {Q}uery {L}anguage --- w3.org},
	howpublished = {\url{https://www.w3.org/TR/sparql11-query/}},
	year = {},
	note = {[Accessed 04-12-2024]},
}

@InProceedings{tam2024opportunitiesshapebasedoptimizationlink,
  author    = {Tam, Bryan-Elliott and Taelman, Ruben and Colpaert, Pieter and Verborgh, Ruben},
  booktitle = {Proceedings of the 16th Alberto Mendelzon International Workshop on Foundations of Data Management},
  title     = {Opportunities for Shape-based Optimization of Link Traversal Queries},
  year      = {2024},
  month     = sep,
  url       = {https://arxiv.org/pdf/2407.00998v2},
}

@InProceedings{verstraete2022solid,
  author    = {Verstraete, Melanie and Verbrugge, Sofie and Colle, Didier},
  booktitle = {ITS},
  title     = {Solid: Enabler of decentralized, digital platforms ecosystems},
  year      = {2022},
  pages     = {1--19},
  file      = {:article/Solid Enabler of decentralized, digital platforms ecosystems:},
  groups    = {Commodification internet},
}

@article{taelman2024towards,
  title={Towards Applications on the Decentralized Web using Hypermedia-driven Query Engines},
  author={Taelman, Ruben},
  journal={ACM SIGWEB Newsletter},
  volume={2024},
  number={Summer},
  pages={1--9},
  year={2024},
  publisher={ACM New York, NY, USA}
}

@inproceedings{hose2012towards,
  title={Towards benefit-based RDF source selection for SPARQL queries},
  author={Hose, Katja and Schenkel, Ralf},
  booktitle={Proceedings of the 4th International Workshop on Semantic Web Information Management},
  pages={1--8},
  year={2012}
}

@inproceedings{Harth2010,
author = {Harth, Andreas and Hose, Katja and Karnstedt, Marcel and Polleres, Axel and Sattler, Kai-Uwe and Umbrich, J\"{u}rgen},
title = {Data summaries for on-demand queries over linked data},
year = {2010},
isbn = {9781605587998},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1772690.1772733},
doi = {10.1145/1772690.1772733},
abstract = {Typical approaches for querying structured Web Data collect (crawl) and pre-process (index) large amounts of data in a central data repository before allowing for query answering. However, this time-consuming pre-processing phase however leverages the benefits of Linked Data -- where structured data is accessible live and up-to-date at distributed Web resources that may change constantly -- only to a limited degree, as query results can never be current. An ideal query answering system for Linked Data should return current answers in a reasonable amount of time, even on corpora as large as the Web. Query processors evaluating queries directly on the live sources require knowledge of the contents of data sources. In this paper, we develop and evaluate an approximate index structure summarising graph-structured content of sources adhering to Linked Data principles, provide an algorithm for answering conjunctive queries over Linked Data on theWeb exploiting the source summary, and evaluate the system using synthetically generated queries. The experimental results show that our lightweight index structure enables complete and up-to-date query results over Linked Data, while keeping the overhead for querying low and providing a satisfying source ranking at no additional cost.},
booktitle = {Proceedings of the 19th International Conference on World Wide Web},
pages = {411–420},
numpages = {10},
keywords = {index structures, linked data, rdf querying},
location = {Raleigh, North Carolina, USA},
series = {WWW '10}
}


@InProceedings{aranda2013,
  author    = {Buil-Aranda, Carlos and Hogan, Aidan and Umbrich, J\"{u}rgen and Vandenbussche, Pierre-Yves},
  booktitle = {Proceedings 12th ISWC},
  title     = {SPARQL Web-Querying Infrastructure: Ready for Action?},
  year      = {2013},
  address  = {Berlin, Heidelberg},
  pages    = {277–293},
  publisher= {Springer-Verlag},
  series   = {ISWC '13},
  doi      = {10.1007/978-3-642-41338-4_18},
  file      = {:articles/SPARQL Web-Querying Infrastructure.pdf:PDF},
  groups    = {rdf statistic, statistic},
  isbn     = {9783642413377},
  numpages = {17},
  url       = {https://doi.org/10.1007/978-3-642-41338-4_18},
}


@article{LabraGayo2023,
  title = {RDF Data integration using Shape Expressions},
  url = {http://dx.doi.org/10.37044/osf.io/md73k},
  DOI = {10.37044/osf.io/md73k},
  publisher = {Center for Open Science},
  author = {Labra-Gayo,  Jose Emilio and Waagmeester,  Andra and Iglesias-Préstamo,  Ángel and Yamamoto,  Yasunori and Katayama,  Toshiaki and Liener,  Thomas and Unni,  Deepak and Bolleman,  Jerven and Aoki-Kinoshita,  Kiyoko Flora and Yokochi,  Masashi and Queralt-Rosinach,  Núria and Mori,  Hiroshi and Álvarez,  Daniel Fernández and Labarga,  Alberto and Benítez-Andrades,  José Alberto and Hoehndorf,  Robert and Prud’hommeaux,  Eric and Nanjo,  Claude and Thalhath,  Nishad},
  year = {2023},
  month = jul 
}

@article{fernandez2023extracting,
  title={Extracting shapes from large RDF data collections},
  author={Fern{\'a}ndez-{\'A}lvarez, Daniel and Yamamoto, Yasunori and Labra-Gayo, Jose Emilio and Waagmeester, Andra},
  year={2023}
}


@InProceedings{9663788,
  author    = {Mechant, Peter and De Wolf, Ralf and Van Compernolle, Mathias and Joris, Glen and Evens, Tom and De Marez, Lieven},
  booktitle = {2021 14th CMI International Conference},
  title     = {Saving the web by decentralizing data networks? A socio-technical reflection on the promise of decentralization and personal data stores},
  year      = {2021},
  pages     = {1-6},
  doi       = {10.1109/CMI53512.2021.9663788},
  file      = {:article/Saving the web by decentralizing data networks?
A socio-technical reflection on the promise of
decentralization and personal data stores.pdf:PDF},
  groups    = {Commodification internet},
}

@inproceedings{tam_iswc_traversalsensortree_2024,
  author = {Tam, Bryan-Elliott and Taelman, Ruben and Rojas Meléndez, Julián and Colpaert, Pieter},
  title = {Optimizing Traversal Queries of Sensor Data Using a Rule-Based Reachability Approach},
  month = nov,
  booktitle = {Proceedings of the 23rd International Semantic Web Conference: Posters and Demos},
  year = {2024},
  url = {https://arxiv.org/pdf/2408.17157}
}

@InProceedings{9663788,
  author    = {Mechant, Peter and De Wolf, Ralf and Van Compernolle, Mathias and Joris, Glen and Evens, Tom and De Marez, Lieven},
  booktitle = {2021 14th CMI International Conference},
  title     = {Saving the web by decentralizing data networks? A socio-technical reflection on the promise of decentralization and personal data stores},
  year      = {2021},
  pages     = {1-6},
  doi.      = {10.1109/CMI53512.2021.9663788},
  file      = {:article/Saving the web by decentralizing data networks?
A socio-technical reflection on the promise of
decentralization and personal data stores.pdf:PDF},
  groups    = {Commodification internet},
}

@Article{Terranova2000FreeLP,
  author     = {Tiziana Terranova},
  journal    = {Social Text},
  title      = {Free Labor: Producing Culture for the Digital Economy},
  year       = {2000},
  file       = {:article/Free Labor Producing Culture for the Digital Economy.pdf:PDF},
  groups     = {Commodification internet},
  pages  = {33 - 58},
  priority   = {prio1},
  ranking    = {rank5},
  readstatus = {read},
  volume = {18},
}


@InCollection{Curran2016ch1,
  author    = {James Curran},
  booktitle = {Misunderstanding the Internet},
  publisher = {Routledge 2012},
  title     = {The internet of dreams Reinterpreting the internet},
  year      = {2016},
  chapter   = {1},
  groups    = {Commodification internet},
}

@Article{Sevignani2013,
  author     = {S. Sevignani},
  journal    = {Science and Public Policy},
  title      = {The commodification of privacy on the Internet},
  year       = {2013},
  month      = nov,
  number     = {6},
  pages      = {733--739},
  volume     = {40},
  doi        = {10.1093/scipol/sct082},
  file       = {:article/The commodification of privacy on the internet.pdf:PDF},
  groups     = {Commodification internet},
  priority   = {prio1},
  publisher  = {Oxford University Press ({OUP})},
  ranking    = {rank5},
  readstatus = {read},
  url        = {https://doi.org/10.1093/scipol/sct082},
}

@article{Karim2020,
  title = {Compacting frequent star patterns in RDF graphs},
  volume = {55},
  ISSN = {1573-7675},
  url = {http://dx.doi.org/10.1007/s10844-020-00595-9},
  DOI = {10.1007/s10844-020-00595-9},
  number = {3},
  journal = {Journal of Intelligent Information Systems},
  publisher = {Springer Science and Business Media LLC},
  author = {Karim,  Farah and Vidal,  Maria-Esther and Auer,  S\"{o}ren},
  year = {2020},
  month = apr,
  pages = {561–585}
}

@InProceedings{9663788,
  author    = {Mechant, Peter and De Wolf, Ralf and Van Compernolle, Mathias and Joris, Glen and Evens, Tom and De Marez, Lieven},
  booktitle = {2021 14th CMI International Conference},
  title     = {Saving the web by decentralizing data networks? A socio-technical reflection on the promise of decentralization and personal data stores},
  year      = {2021},
  pages     = {1-6},
  doi       = {10.1109/CMI53512.2021.9663788},
  file      = {:article/Saving the web by decentralizing data networks?
A socio-technical reflection on the promise of
decentralization and personal data stores.pdf:PDF},
  groups    = {Commodification internet},
}

@article{hartigLDQL,
title = {LDQL: A query language for the Web of Linked Data},
journal = {Journal of Web Semantics},
volume = {41},
pages = {9-29},
year = {2016},
issn = {1570-8268},
doi = {https://doi.org/10.1016/j.websem.2016.10.001},
url = {https://www.sciencedirect.com/science/article/pii/S1570826816300476},
author = {Olaf Hartig and Jorge Pérez},
keywords = {Linked Data, Query language, Foundations, SPARQL, Queries},
abstract = {The Web of Linked Data is composed of tons of RDF documents interlinked to each other forming a huge repository of distributed semantic data. Effectively querying this distributed data source is an important open problem in the Semantic Web area. In this paper, we propose LDQL, a declarative language to query Linked Data on the Web. One of the novelties of LDQL is that it expresses separately (i) patterns that describe the expected query result, and (ii) Web navigation paths that select the data sources to be used for computing the result. We present a formal syntax and semantics, prove equivalence rules, and study the expressiveness of the language. In particular, we show that LDQL is strictly more expressive than all the query formalisms that have been proposed previously for Linked Data on the Web. We also study some computability issues regarding LDQL. We first prove that when considering the Web of Linked Data as a fully accessible graph, the evaluation problem for LDQL can be solved in polynomial time. Nevertheless, when the limited data access capabilities of Web clients are considered, the scenario changes drastically; there are LDQL queries for which a complete execution is not possible in practice. We formally study this issue and provide a sufficient syntactic condition to avoid this problem; queries satisfying this condition are ensured to have a procedure to be effectively evaluated over the Web of Linked Data.}
}

@inproceedings{Fujimoto2022,
  title = {A Simple Algorithm for Checking Pattern Query Containment under Shape Expression Schema},
  url = {http://dx.doi.org/10.5220/0011536800003318},
  DOI = {10.5220/0011536800003318},
  booktitle = {Proceedings of the 18th International Conference on Web Information Systems and Technologies},
  publisher = {SCITEPRESS - Science and Technology Publications},
  author = {Fujimoto,  Haruna and Suzuki,  Nobutaka},
  year = {2022}
}

@Article{Spasi2023,
  author    = {Mirko Spasi{\'{c}} and Milena Vujo{\v{s}}evi{\'{c}} Jani{\v{c}}i{\'{c}}},
  journal   = {Journal of Web Semantics},
  title     = {Solving the {SPARQL} query containment problem with {SpeCS}},
  year      = {2023},
  month     = apr,
  pages     = {100770},
  volume    = {76},
  comment   = {I have been told to be careful about the claim of that paper because it has been rejected by another journal because there was a problem in the approach.},
  doi       = {10.1016/j.websem.2022.100770},
  file      = {:papers/Solving the SPARQL query containment problem with SpeCS.pdf:PDF},
  groups    = {query containment bag semantic, query containment with RDF},
  priority  = {prio1},
  publisher = {Elsevier {BV}},
  url       = {https://doi.org/10.1016/j.websem.2022.100770},
}

@InBook{afariQCE,
  author     = {Foto Afrati, Rada Chirkova},
  chapter    = {2},
  pages      = {21-59},
  publisher  = {Springer Cham},
  title      = {Query Containment and Equivalence},
  year       = {2019},
  booktitle  = {Answering Queries Using Views, Second Edition},
  doi        = {https://doi.org/10.1007/978-3-031-01871-8},
  file       = {:papers/Query Containment and Equivalence.pdf:PDF;:papers/Query Containment and Equivalence/p23.jpg:JPG image},
  groups     = {query containment},
  ranking    = {rank5},
  readstatus = {read},
}

@InBook{afariAQD,
  author    = {Foto Afrati, Rada Chirkova},
  chapter   = {5},
  pages     = {125-142},
  publisher = {Springer Cham},
  title     = {Answering Queries in Presence of Dependencies},
  year      = {2019},
  booktitle = {Answering Queries Using Views, Second Edition},
  doi       = {https://doi.org/10.1007/978-3-031-01871-8},
  file      = {:papers/Answering Queries in Presence of Dependencies.pdf:PDF},
  groups    = {query containment},
  priority  = {prio3},
}

@Article{10.1145/3472391,
  author     = {Khamis, Mahmoud Abo and Kolaitis, Phokion G. and Ngo, Hung Q. and Suciu, Dan},
  journal    = {ACM Trans. Database Syst.},
  title      = {Bag Query Containment and Information Theory},
  year       = {2021},
  issn       = {0362-5915},
  month      = {sep},
  number     = {3},
  volume     = {46},
  abstract   = {The query containment problem is a fundamental algorithmic problem in data management. While this problem is well understood under set semantics, it is by far less understood under bag semantics. In particular, it is a long-standing open question whether or not the conjunctive query containment problem under bag semantics is decidable. We unveil tight connections between information theory and the conjunctive query containment under bag semantics. These connections are established using information inequalities, which are considered to be the laws of information theory. Our first main result asserts that deciding the validity of a generalization of information inequalities is many-one equivalent to the restricted case of conjunctive query containment in which the containing query is acyclic; thus, either both these problems are decidable or both are undecidable. Our second main result identifies a new decidable case of the conjunctive query containment problem under bag semantics. Specifically, we give an exponential-time algorithm for conjunctive query containment under bag semantics, provided the containing query is chordal and admits a simple junction tree.},
  address    = {New York, NY, USA},
  articleno  = {12},
  doi        = {10.1145/3472391},
  file       = {:papers/Bag Query Containment and Information Theory.pdf:PDF},
  groups     = {reading, query containment bag semantic},
  issue_date = {September 2021},
  keywords   = {information theory, entropy, Query containment, bag semantics},
  numpages   = {39},
  priority   = {prio1},
  publisher  = {Association for Computing Machinery},
  url        = {https://doi.org/10.1145/3472391},
}

@Article{Chekol2018,
  author    = {Melisachew Wudage Chekol and J{\'{e}}r{\^{o}}me Euzenat and Pierre Genev{\`{e}}s and Nabil Layaïda},
  journal   = {Journal on Data Semantics},
  title     = {{SPARQL} Query Containment Under Schema},
  year      = {2018},
  month     = may,
  number    = {3},
  pages     = {133--154},
  volume    = {7},
  doi       = {10.1007/s13740-018-0087-1},
  file      = {:papers/SPARQL Query Containment under Schema.pdf:PDF},
  groups    = {query containment bag semantic, query containment with RDF, query-containment with schema},
  priority  = {prio1},
  publisher = {Springer Science and Business Media {LLC}},
  url       = {https://doi.org/10.1007/s13740-018-0087-1},
}

@Misc{2112.11796,
  author     = {Thomas Delva and Anastasia Dimou and Maxime Jakubowski and Jan Van den Bussche},
  title      = {Shape Fragments},
  year       = {2021},
  comment    = {It's a bit complicated to understand but I think the idea of converting the shape into a query or converting the shape directly into a canonical database (not in the paper) might be a very good approach. But the query are complicated and the software proposed is a naive implementation.},
  eprint     = {arXiv:2112.11796},
  file       = {:papers/Shape Fragments.pdf:PDF},
  groups     = {shape},
  ranking    = {rank4},
  readstatus = {skimmed},
}

@Misc{koutisLecture,
  author = {Paris Koutris},
  title  = {Lecture 2: Query Containment},
  file   = {:papers/Koutris_query_containment.pdf:PDF},
  groups = {query containment},
}

@InBook{Chirkova2009,
  author    = {Chirkova, Rada},
  editor    = {LIU, LING and {\"O}ZSU, M. TAMER},
  pages     = {2249--2253},
  publisher = {Springer US},
  title     = {Query Containment},
  year      = {2009},
  address   = {Boston, MA},
  isbn      = {978-0-387-39940-9},
  booktitle = {Encyclopedia of Database Systems},
  doi       = {10.1007/978-0-387-39940-9_1269},
  file      = {:papers/Query Containment.pdf:PDF},
  groups    = {query containment},
  priority  = {prio1},
  url       = {https://doi.org/10.1007/978-0-387-39940-9_1269},
}

@InProceedings{10.1145/275487.275511,
  author    = {Kolaitis, Phokion G. and Vardi, Moshe Y.},
  booktitle = {Proceedings of the Seventeenth ACM SIGACT-SIGMOD-SIGART Symposium on Principles of Database Systems},
  title     = {Conjunctive-Query Containment and Constraint Satisfaction},
  year      = {1998},
  address   = {New York, NY, USA},
  pages     = {205–213},
  publisher = {Association for Computing Machinery},
  series    = {PODS '98},
  doi       = {10.1145/275487.275511},
  file      = {:papers/Conjunctive query containment and constraint satisfaction.pdf:PDF},
  groups    = {query containment},
  isbn      = {0897919963},
  location  = {Seattle, Washington, USA},
  numpages  = {9},
  priority  = {prio1},
  url       = {https://doi.org/10.1145/275487.275511},
}

@InProceedings{Ullman1997,
  author    = {Ullman, Jeffrey D.},
  booktitle = {Database Theory --- ICDT '97},
  title     = {Information integration using logical views},
  year      = {1997},
  address   = {Berlin, Heidelberg},
  editor    = {Afrati, Foto and Kolaitis, Phokion},
  pages     = {19--40},
  publisher = {Springer Berlin Heidelberg},
  abstract  = {A number of ideas concerning information-integration tools can be thought of as constructing answers to queries using views that represent the capabilities of information sources. We review the formal basis of these techniques, which are closely related to containment algorithms for conjunctive queries and/or Datalog programs. Then we compare the approaches taken by AT{\&}T Labs' ``Information Manifold'' and the Stanford ``Tsimmis'' project in these terms.},
  file      = {:papers/Information integration using logical views.pdf:PDF},
  groups    = {query containment},
  isbn      = {978-3-540-49682-3},
  priority  = {prio2},
}

@InProceedings{Boneva2017,
  author        = {Boneva, Iovka and Labra Gayo, Jose E. and Prud’hommeaux, Eric G.},
  booktitle     = {The Semantic Web – ISWC 2017: 16th International Semantic Web Conference, Vienna, Austria, October 21–25, 2017, Proceedings, Part I},
  title         = {Semantics and Validation of Shapes Schemas for RDF},
  year          = {2017},
  address       = {Berlin, Heidelberg},
  pages         = {104–120},
  publisher     = {Springer-Verlag},
  abstract      = {We present a formal semantics and proof of soundness for shapes schemas, an expressive schema language for RDF graphs that is the foundation of Shape Expressions Language 2.0. It can be used to describe the vocabulary and the structure of an RDF graph, and to constrain the admissible properties and values for nodes in that graph. The language defines a typing mechanism called shapes against which nodes of the graph can be checked. It includes an algebraic grouping operator, a choice operator and cardinality constraints for the number of allowed occurrences of a property. Shapes can be combined using Boolean operators, and can use possibly recursive references to other shapes.We describe the syntax of the language and define its semantics. The semantics is proven to be well-defined for schemas that satisfy a reasonable syntactic restriction, namely stratified use of negation and recursion. We present two algorithms for the validation of an RDF graph against a shapes schema. The first algorithm is a direct implementation of the semantics, whereas the second is a non-trivial improvement. We also briefly give implementation guidelines.},
  comment-id357 = {Cet article a une bonne formulation des formes, je pense qu'il peut être un bon point de départ pour ma propre formulation, il y a aussi une bonne gestion des négations. Par contre le problème de transformer les formes dans une formulation compatible avec le problème de containement reste un problème.},
  doi           = {10.1007/978-3-319-68288-4_7},
  file          = {:papers/Semantics and Validation of Shapes Schemas for RDF.pdf:PDF},
  groups        = {shape},
  isbn          = {978-3-319-68287-7},
  location      = {Vienna, Austria},
  numpages      = {17},
  ranking       = {rank5},
  readstatus    = {read},
  url           = {https://doi.org/10.1007/978-3-319-68288-4_7},
}

@Misc{afrati2020complexity,
  author        = {Foto N. Afrati and Matthew Damigos},
  title         = {On the complexity of query containment and computing certain answers in the presence of ACs},
  year          = {2020},
  archiveprefix = {arXiv},
  comment-id357 = {Help to explain the cannonical database for query with arithmetic comparator},
  eprint        = {2008.10986},
  file          = {:papers/On the Complexity of Query Containment and Computing Certain Answers in the Presence of ACs.pdf:PDF},
  groups        = {query containment},
  primaryclass  = {cs.DB},
  ranking       = {rank4},
  readstatus    = {skimmed},
}

@Article{Leinberger2020DecidingSS,
  author   = {Martin Leinberger and Philipp Seifer and Tjitze Rienstra and Ralf Lammel and Steffen Staab},
  journal  = {ArXiv},
  title    = {Deciding SHACL Shape Containment through Description Logics Reasoning (Extended Version)},
  year     = {2020},
  volume   = {abs/2008.13603},
  file     = {:papers/Deciding SHACL Shape Containment through Description Logics Reasoning Extended Version.pdf:PDF},
  groups   = {shape containment},
  priority = {prio1},
  url      = {https://api.semanticscholar.org/CorpusID:221377301},
}

@Article{Pareti2020SHACLSA,
  author   = {Paolo Pareti and G. Konstantinidis and Fabio Mogavero and Timothy J. Norman},
  journal  = {ArXiv},
  title    = {SHACL Satisfiability and Containment (Extended Paper)},
  year     = {2020},
  volume   = {abs/2009.09806},
  file     = {:papers/SHACL Satisfiability and Containment Extended Paper.pdf:PDF},
  groups   = {shape containment},
  priority = {prio1},
  url      = {https://api.semanticscholar.org/CorpusID:221818992},
}

@Article{Staworko2018ContainmentOS,
  author   = {Slawomir Staworko and Piotr Wieczorek},
  journal  = {Proceedings of the 38th ACM SIGMOD-SIGACT-SIGAI Symposium on Principles of Database Systems},
  title    = {Containment of Shape Expression Schemas for RDF},
  year     = {2018},
  file     = {:papers/Containment of Shape Expression Schemas for RDF.pdf:PDF},
  groups   = {shape containment, reading},
  priority = {prio1},
  url      = {https://api.semanticscholar.org/CorpusID:4337350},
}

@Article{Afrati2010,
  author     = {Foto N. Afrati and Matthew Damigos and Manolis Gergatsoulis},
  journal    = {Information Processing Letters},
  title      = {Query containment under bag and bag-set semantics},
  year       = {2010},
  month      = apr,
  number     = {10},
  pages      = {360--369},
  volume     = {110},
  comment    = {Very good paper to solve the QC problem with bag semantic-set describe well their approach and the complexity. Will be useful, if I need to implement something or to describe theoritical points.},
  doi        = {10.1016/j.ipl.2010.02.017},
  file       = {:papers/Query containment under bag and bag set semantics.pdf:PDF},
  groups     = {query containment bag semantic},
  priority   = {prio1},
  publisher  = {Elsevier {BV}},
  ranking    = {rank5},
  readstatus = {read},
  url        = {https://doi.org/10.1016/j.ipl.2010.02.017},
}

@Misc{kopparty2010homomorphism,
  author        = {Swastik Kopparty and Benjamin Rossman},
  title         = {The Homomorphism Domination Exponent},
  year          = {2010},
  archiveprefix = {arXiv},
  eprint        = {1004.2485},
  file          = {:papers/The Homomorphism Domination Exponent.pdf:PDF},
  groups        = {query containment bag semantic},
  primaryclass  = {math.CO},
}

@Article{Saleem2018,
  author    = {Muhammad Saleem and Ali Hasnain and Axel-Cyrille Ngonga Ngomo},
  journal   = {Journal of Web Semantics},
  title     = {{LargeRDFBench}: A billion triples benchmark for {SPARQL} endpoint federation},
  year      = {2018},
  month     = jan,
  pages     = {85--125},
  volume    = {48},
  doi       = {10.1016/j.websem.2017.12.005},
  file      = {:papers/LargeRDFBench A billion triples benchmark for SPARQL endpoint.pdf:PDF},
  groups    = {federated queries},
  publisher = {Elsevier {BV}},
  url       = {https://doi.org/10.1016/j.websem.2017.12.005},
}

@InProceedings{10.1145/153850.153856,
  author    = {Chaudhuri, Surajit and Vardi, Moshe Y.},
  booktitle = {Proceedings of the Twelfth ACM SIGACT-SIGMOD-SIGART Symposium on Principles of Database Systems},
  title     = {Optimization of Real Conjunctive Queries},
  year      = {1993},
  address   = {New York, NY, USA},
  pages     = {59–70},
  publisher = {Association for Computing Machinery},
  series    = {PODS '93},
  doi       = {10.1145/153850.153856},
  file      = {:papers/Optimization of real conjunctive queries.pdf:PDF},
  groups    = {query containment bag semantic},
  isbn      = {0897915933},
  location  = {Washington, D.C., USA},
  numpages  = {12},
  url       = {https://doi.org/10.1145/153850.153856},
}

@Online{spapeExpressionConvert,
  author   = {W3C},
  file     = {:papers/SPARQL Queries to Validate Shape Expressions.pdf:PDF},
  groups   = {shape},
  priority = {prio1},
  title    = {SPARQL Queries to Validate Shape Expressions (informative)},
  url      = {https://www.w3.org/2013/ShEx/toSPARQL.html},
  urldate  = {2023-09-11},
  year     = {2013},
}

@InProceedings{Abbas2017,
  author     = {Abbas, Abdullah and Genev{\`e}s, Pierre and Roisin, C{\'e}cile and Laya{\"i}da, Nabil},
  booktitle  = {Advances in Databases and Information Systems},
  title      = {SPARQL Query Containment with ShEx Constraints},
  year       = {2017},
  address    = {Cham},
  editor     = {Kirikova, M{\={a}}r{\={\i}}te and N{\o}rv{\aa}g, Kjetil and Papadopoulos, George A.},
  pages      = {343--356},
  publisher  = {Springer International Publishing},
  abstract   = {ShEx (Shape Expressions) is a language for expressing constraints on RDF graphs. We consider the problem of SPARQL query containment in the presence of ShEx constraints. We first propose a sound and complete procedure for the problem of containment with ShEx, considering several SPARQL fragments. Particularly our procedure considers OPTIONAL query patterns, that turns out to be an important fragment to be studied with schemas. We then show the complexity bounds of our problem with respect to the fragments considered. To the best of our knowledge, this is the first work addressing SPARQL query containment in the presence of ShEx constraints.},
  comment    = {what they do is that they use the shape to transform the queries that have filters to delete them when the query will for certain don't respect the containt of the data. Given this transformation some queries can become equivalent. Also if the query is only a BGP then we can simply validate it against the shape if we delete the cardinatity of the operators.},
  file       = {:papers/SPARQL Query Containment with ShEx Constraints.pdf:PDF},
  groups     = {query containment with RDF, query containment bag semantic, query-containment with schema},
  isbn       = {978-3-319-66917-5},
  ranking    = {rank5},
  readstatus = {read},
}

@InProceedings{Kostylev2015,
  author    = {Kostylev, Egor V. and Reutter, Juan L. and Romero, Miguel and Vrgo{\v{c}}, Domagoj},
  booktitle = {The Semantic Web - ISWC 2015},
  title     = {SPARQL with Property Paths},
  year      = {2015},
  address   = {Cham},
  editor    = {Arenas, Marcelo and Corcho, Oscar and Simperl, Elena and Strohmaier, Markus and d'Aquin, Mathieu and Srinivas, Kavitha and Groth, Paul and Dumontier, Michel and Heflin, Jeff and Thirunarayan, Krishnaprasad and Thirunarayan, Krishnaprasad and Staab, Steffen},
  pages     = {3--18},
  publisher = {Springer International Publishing},
  abstract  = {The original SPARQL proposal was often criticized for its inability to navigate through the structure of RDF documents. For this reason property paths were introduced in SPARQL 1.1, but up to date there are no theoretical studies examining how their addition to the language affects main computational tasks such as query evaluation, query containment, and query subsumption. In this paper we tackle all of these problems and show that although the addition of property paths has no impact on query evaluation, they do make the containment and subsumption problems substantially more difficult.},
  file      = {:references/SPARQL with Property Paths.pdf:PDF},
  groups    = {query containment with RDF, query-containment with schema},
  isbn      = {978-3-319-25007-6},
}

@InProceedings{10.1145/2594538.2594542,
  author    = {Pichler, Reinhard and Skritek, Sebastian},
  booktitle = {Proceedings of the 33rd ACM SIGMOD-SIGACT-SIGART Symposium on Principles of Database Systems},
  title     = {Containment and Equivalence of Well-Designed SPARQL},
  year      = {2014},
  address   = {New York, NY, USA},
  pages     = {39–50},
  publisher = {Association for Computing Machinery},
  series    = {PODS '14},
  abstract  = {Query containment and query equivalence constitute important computational problems in the context of static query analysis and optimization. While these problems have been intensively studied for fragments of relational calculus, almost no works exist for the semantic web query language SPARQL. In this paper, we carry out a comprehensive complexity analysis of containment and equivalence for several fragments of SPARQL: we start with the fundamental fragment of well-designed SPARQL restricted to the AND and OPTIONAL operator. We then study basic extensions in the form of the UNION operator and/or projection. The results obtained range from NP-completeness to undecidability.},
  doi       = {10.1145/2594538.2594542},
  file      = {:papers/Containment and Equivalence of Well Designed SPARQL.pdf:PDF},
  groups    = {query containment with RDF},
  isbn      = {9781450323758},
  keywords  = {query containment, RDF, semantic web, SPARQL},
  location  = {Snowbird, Utah, USA},
  numpages  = {12},
  url       = {https://doi.org/10.1145/2594538.2594542},
}

@Article{10.1145/2500130,
  author     = {Letelier, Andr\'{e}s and P\'{e}rez, Jorge and Pichler, Reinhard and Skritek, Sebastian},
  journal    = {ACM Trans. Database Syst.},
  title      = {Static Analysis and Optimization of Semantic Web Queries},
  year       = {2013},
  issn       = {0362-5915},
  month      = {dec},
  number     = {4},
  volume     = {38},
  abstract   = {Static analysis is a fundamental task in query optimization. In this article we study static analysis and optimization techniques for SPARQL, which is the standard language for querying Semantic Web data. Of particular interest for us is the optionality feature in SPARQL. It is crucial in Semantic Web data management, where data sources are inherently incomplete and the user is usually interested in partial answers to queries. This feature is one of the most complicated constructors in SPARQL and also the one that makes this language depart from classical query languages such as relational conjunctive queries. We focus on the class of well-designed SPARQL queries, which has been proposed in the literature as a fragment of the language with good properties regarding query evaluation. We first propose a tree representation for SPARQL queries, called pattern trees, which captures the class of well-designed SPARQL graph patterns. Among other results, we propose several rules that can be used to transform pattern trees into a simple normal form, and study equivalence and containment. We also study the evaluation and enumeration problems for this class of queries.},
  address    = {New York, NY, USA},
  articleno  = {25},
  doi        = {10.1145/2500130},
  file       = {:papers/Static Analysis and Optimization of Semantic Web Queries.pdf:PDF},
  groups     = {query containment with RDF},
  issue_date = {November 2013},
  keywords   = {query containment, Semantic Web, RDF, SPARQL, Optimization},
  numpages   = {45},
  publisher  = {Association for Computing Machinery},
  url        = {https://doi.org/10.1145/2500130},
}

@InProceedings{10.1145/3366423.3380177,
  author     = {Azzam, Amr and Fern\'{a}ndez, Javier D. and Acosta, Maribel and Beno, Martin and Polleres, Axel},
  booktitle  = {Proceedings of The Web Conference 2020},
  title      = {SMART-KG: Hybrid Shipping for SPARQL Querying on the Web},
  year       = {2020},
  address    = {New York, NY, USA},
  pages      = {984–994},
  publisher  = {Association for Computing Machinery},
  series     = {WWW '20},
  abstract   = {While Linked Data (LD) provides standards for publishing (RDF) and (SPARQL) querying Knowledge Graphs (KGs) on the Web, serving, accessing and processing such open, decentralized KGs is often practically impossible, as query timeouts on publicly available SPARQL endpoints show. Alternative solutions such as Triple Pattern Fragments (TPF) attempt to tackle the problem of availability by pushing query processing workload to the client side, but suffer from unnecessary transfer of irrelevant data on complex queries with large intermediate results. In this paper we present smart-KG, a novel approach to share the load between servers and clients, while significantly reducing data transfer volume, by combining TPF with shipping compressed KG partitions. Our evaluations show that smart-KG outperforms state-of-the-art client-side solutions and increases server-side availability towards more cost-effective and balanced hosting of open and decentralized KGs.},
  comment    = {When I will be doing join optimization maybe the paper will be more useful, but for the moment I don't think it helped  a lot internal partition.},
  doi        = {10.1145/3366423.3380177},
  file       = {:papers/SMART KG Hybrid Shipping for SPARQL Querying on the Web.pdf:PDF},
  groups     = {charateristic set},
  isbn       = {9781450370233},
  location   = {Taipei, Taiwan},
  numpages   = {11},
  ranking    = {rank3},
  readstatus = {skimmed},
  url        = {https://doi.org/10.1145/3366423.3380177},
}

@Article{Neumann2011CharacteristicSA,
  author     = {Thomas Neumann and Guido Moerkotte},
  journal    = {2011 IEEE 27th International Conference on Data Engineering},
  title      = {Characteristic sets: Accurate cardinality estimation for RDF queries with multiple joins},
  year       = {2011},
  pages      = {984-994},
  comment    = {Je pense que cela ressemble beaucoup a shape, donc une approche de convertir des shape en une approximation de caracteristique set serait bien.},
  file       = {:papers/Characteristic sets Accurate cardinality estimation for RDF queries with multiple joins.pdf:PDF},
  groups     = {charateristic set},
  ranking    = {rank5},
  readstatus = {skimmed},
  url        = {https://api.semanticscholar.org/CorpusID:2208604},
}

@Article{Meimaris2017ExtendedCS,
  author  = {Marios Meimaris and George Papastefanatos and Nikos Mamoulis and Ioannis Anagnostopoulos},
  journal = {2017 IEEE 33rd International Conference on Data Engineering (ICDE)},
  title   = {Extended Characteristic Sets: Graph Indexing for SPARQL Query Optimization},
  year    = {2017},
  pages   = {497-508},
  file    = {:papers/Extended Characteristic Sets Graph Indexing for SPARQL Query Optimization.pdf:PDF},
  groups  = {charateristic set},
  url     = {https://api.semanticscholar.org/CorpusID:3535607},
}

@Article{Meimaris2018HierarchicalCS,
  author  = {Marios Meimaris and George Papastefanatos},
  journal = {ArXiv},
  title   = {Hierarchical Characteristic Set Merging for Optimizing SPARQL Queries in Heterogeneous RDF},
  year    = {2018},
  volume  = {abs/1809.02345},
  file    = {:papers/Hierarchical Characteristic Set Merging for Optimizing SPARQL Queries in Heterogeneous RDF.pdf:PDF},
  groups  = {charateristic set},
  url     = {https://api.semanticscholar.org/CorpusID:52176979},
}

@InProceedings{staworko_et_al:LIPIcs:2015:4985,
  author    = {Slawek Staworko and Iovka Boneva and Jose E. Labra Gayo and Samuel Hym and Eric G. Prud'hommeaux and Harold Solbrig},
  booktitle = {18th International Conference on Database Theory (ICDT 2015)},
  title     = {{Complexity and Expressiveness of ShEx for RDF}},
  year      = {2015},
  address   = {Dagstuhl, Germany},
  editor    = {Marcelo Arenas and Mart{\'i}n Ugarte},
  pages     = {195--211},
  publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
  series    = {Leibniz International Proceedings in Informatics (LIPIcs)},
  volume    = {31},
  annote    = {Keywords: RDF, Schema, Graph topology, Validation, Complexity, Expressiveness},
  doi       = {10.4230/LIPIcs.ICDT.2015.195},
  groups    = {shape},
  isbn      = {978-3-939897-79-8},
  issn      = {1868-8969},
  url       = {http://drops.dagstuhl.de/opus/volltexte/2015/4985},
  urn       = {urn:nbn:de:0030-drops-49856},
}

@Misc{zervakis2019efficient,
  author        = {Lefteris Zervakis and Vinay Setty and Christos Tryfonopoulos and Katja Hose},
  title         = {Efficient Continuous Multi-Query Processing over Graph Streams},
  year          = {2019},
  archiveprefix = {arXiv},
  eprint        = {1902.05134},
  file          = {:papers/Efﬁcient Continuous Multi-Query Processing over Graph Streams:},
  groups        = {query containment bag semantic},
  primaryclass  = {cs.DS},
}

@InProceedings{Hartig2012,
  author       = {Hartig, Olaf and Freytag, Johann-Christoph},
  booktitle    = {Conference on Hypertext and Social Media},
  title        = {Foundations of Traversal Based Query Execution over Linked Data},
  publisher    = {ACM},
  series       = {HT '12},
  abstract     = {Query execution over the Web of Linked Data has attracted much attention recently. A particularly interesting approach is link traversal based query execution which proposes to integrate the traversal of data links into the creation of query results. Hence -in contrast to traditional query execution paradigms- this does not assume a fixed set of relevant data sources beforehand; instead, the traversal process discovers data and data sources on the fly and, thus, enables applications to tap the full potential of the Web.While several authors have studied possibilities to implement the idea of link traversal based query execution and to optimize query execution in this context, no work exists that discusses theoretical foundations of the approach in general. Our paper fills this gap.We introduce a well-defined semantics for queries that may be executed using a link traversal based approach. Based on this semantics we formally analyze properties of such queries. In particular, we study the computability of queries as well as the implications of querying a potentially infinite Web of Linked Data. Our results show that query computation in general is not guaranteed to terminate and that for any given query it is undecidable whether the execution terminates. Furthermore, we define an abstract execution model that captures the integration of link traversal into the query execution process. Based on this model we prove the soundness and completeness of link traversal based query execution and analyze an existing implementation approach.},
  address  = {New York, NY, USA},
  doi      = {10.1145/2309996.2310005},
  file         = {:references/Foundations of Traversal Based Query Execution.pdf:PDF},
  groups       = {Link traversal, Link Traversal Query Processing},
  isbn         = {9781450313353},
  keywords     = {link traversal based query execution, computability, query semantics, web of data, linked data},
  location = {Milwaukee, Wisconsin, USA},
  numpages = {10},
  pages    = {43–52},
  ranking      = {rank5},
  readstatus   = {read},
  url      = {https://doi.org/10.1145/2309996.2310005},
  year     = {2012},
}

@InProceedings{10.1145/2661829.2661876,
  author    = {Wu, Buwen and Zhou, Yongluan and Yuan, Pingpeng and Jin, Hai and Liu, Ling},
  booktitle = {Proceedings of the 23rd ACM International Conference on Conference on Information and Knowledge Management},
  title     = {SemStore: A Semantic-Preserving Distributed RDF Triple Store},
  year      = {2014},
  address   = {New York, NY, USA},
  pages     = {509–518},
  publisher = {Association for Computing Machinery},
  series    = {CIKM '14},
  abstract  = {The flexibility of the RDF data model has attracted an increasing number of organizations to store their data in an RDF format. With the rapid growth of RDF datasets, we envision that it is inevitable to deploy a cluster of computing nodes to process large-scale RDF data in order to deliver desirable query performance. In this paper, we address the challenging problems of data partitioning and query optimization in a scale-out RDF engine. We identify that existing approaches only focus on using fine-grained structural information for data partitioning, and hence fail to localize many types of complex queries. We then propose a radically different approach, where a coarse-grained structure, namely Rooted Sub-Graph (RSG), is used as the partition unit. By doing so, we can capture structural information at a much greater scale and hence are able to localize many complex queries. We also propose a k-means partitioning algorithm for allocating the RSGs onto the computing nodes as well as a query optimization strategy to minimize the inter-node communication during query processing. An extensive experimental study using benchmark datasets and real dataset shows that our engine, SemStore, outperforms existing systems by orders of magnitudes in terms of query response time.},
  doi       = {10.1145/2661829.2661876},
  file      = {:papers/SemStore A Semantic Preserving Distributed RDF Triple Store.pdf:PDF},
  groups    = {query decomposition},
  isbn      = {9781450325981},
  keywords  = {sparql, rdf, query processing, partitioning},
  location  = {Shanghai, China},
  numpages  = {10},
  url       = {https://doi.org/10.1145/2661829.2661876},
}

@Article{10.14778/2977797.2977806,
  author     = {Sch\"{a}tzle, Alexander and Przyjaciel-Zablocki, Martin and Skilevic, Simon and Lausen, Georg},
  journal    = {Proc. VLDB Endow.},
  title      = {S2RDF: RDF Querying with SPARQL on Spark},
  year       = {2016},
  issn       = {2150-8097},
  month      = {jun},
  number     = {10},
  pages      = {804–815},
  volume     = {9},
  abstract   = {RDF has become very popular for semantic data publishing due to its flexible and universal graph-like data model. Thus, the ever-increasing size of RDF data collections raises the need for scalable distributed approaches. We endorse the usage of existing infrastructures for Big Data processing like Hadoop for this purpose. Yet, SPARQL query performance is a major challenge as Hadoop is not intentionally designed for RDF processing. Existing approaches often favor certain query pattern shapes while performance drops significantly for other shapes. In this paper, we introduce a novel relational partitioning schema for RDF data called ExtVP that uses a semi-join based preprocessing, akin to the concept of Join Indices in relational databases, to efficiently minimize query input size regardless of its pattern shape and diameter. Our prototype system S2RDF is built on top of Spark and uses SQL to execute SPARQL queries over ExtVP. We demonstrate its superior performance in comparison to state of the art SPARQL-on-Hadoop approaches.},
  doi        = {10.14778/2977797.2977806},
  file       = {:papers/RDF Querying with SPARQL on Spark.pdf:PDF},
  groups     = {query decomposition},
  issue_date = {June 2016},
  numpages   = {12},
  publisher  = {VLDB Endowment},
  url        = {https://doi.org/10.14778/2977797.2977806},
}

@InProceedings{10.1145/2588555.2594535,
  author    = {Papailiou, Nikolaos and Tsoumakos, Dimitrios and Konstantinou, Ioannis and Karras, Panagiotis and Koziris, Nectarios},
  booktitle = {Proceedings of the 2014 ACM SIGMOD International Conference on Management of Data},
  title     = {H2RDF+: An Efficient Data Management System for Big RDF Graphs},
  year      = {2014},
  address   = {New York, NY, USA},
  pages     = {909–912},
  publisher = {Association for Computing Machinery},
  series    = {SIGMOD '14},
  abstract  = {The proliferation of data in RDF format has resulted in the emergence of a plethora of specialized management systems. While the ability to adapt to the complexity of a SPARQL query -- given their inherent diversity -- is crucial, current approaches do not scale well when faced with substantially complex, non-selective joins, resulting in exponential growth of execution times. In this demonstration we present H2 RDF+, an RDF store that efficiently performs distributed Merge and Sort-Merge joins using a multiple-index scheme over HBase indexes. Through a greedy planner that incorporates our cost-model, it adaptively commands for either single or multi-machine query execution based on join complexity. In this paper, we present its key scientific contributions and allow participants to interact with an H2RDF+ deployment over a Cloud infrastructure. Using a web-based GUI we allow users to load different datasets (both real and synthetic), apply any query (custom or predefined) and monitor its execution. By allowing real-time inspection of cluster status, response times and committed resources the audience will evaluate the validity of H2RDF+'s claims and perform direct comparisons to two other state-of-the-art RDF stores.},
  doi       = {10.1145/2588555.2594535},
  file      = {:papers/An Efﬁcient Data Management System for Big.pdf:PDF},
  groups    = {query decomposition},
  isbn      = {9781450323765},
  keywords  = {rdf, joins, nosql, sparql, hbase, mapreduce, hadoop},
  location  = {Snowbird, Utah, USA},
  numpages  = {4},
  url       = {https://doi.org/10.1145/2588555.2594535},
}

@InProceedings{10.1145/2661829.2661876,
  author    = {Wu, Buwen and Zhou, Yongluan and Yuan, Pingpeng and Jin, Hai and Liu, Ling},
  booktitle = {Proceedings of the 23rd ACM International Conference on Conference on Information and Knowledge Management},
  title     = {SemStore: A Semantic-Preserving Distributed RDF Triple Store},
  year      = {2014},
  address   = {New York, NY, USA},
  pages     = {509–518},
  publisher = {Association for Computing Machinery},
  series    = {CIKM '14},
  abstract  = {The flexibility of the RDF data model has attracted an increasing number of organizations to store their data in an RDF format. With the rapid growth of RDF datasets, we envision that it is inevitable to deploy a cluster of computing nodes to process large-scale RDF data in order to deliver desirable query performance. In this paper, we address the challenging problems of data partitioning and query optimization in a scale-out RDF engine. We identify that existing approaches only focus on using fine-grained structural information for data partitioning, and hence fail to localize many types of complex queries. We then propose a radically different approach, where a coarse-grained structure, namely Rooted Sub-Graph (RSG), is used as the partition unit. By doing so, we can capture structural information at a much greater scale and hence are able to localize many complex queries. We also propose a k-means partitioning algorithm for allocating the RSGs onto the computing nodes as well as a query optimization strategy to minimize the inter-node communication during query processing. An extensive experimental study using benchmark datasets and real dataset shows that our engine, SemStore, outperforms existing systems by orders of magnitudes in terms of query response time.},
  doi       = {10.1145/2661829.2661876},
  file      = {:papers/A Semantic Preserving Distributed RDF Triple Store.pdf:PDF},
  groups    = {query decomposition},
  isbn      = {9781450325981},
  keywords  = {partitioning, rdf, query processing, sparql},
  location  = {Shanghai, China},
  numpages  = {10},
  url       = {https://doi.org/10.1145/2661829.2661876},
}

@InProceedings{Montoya2017,
  author     = {Montoya, Gabriela and Skaf-Molli, Hala and Hose, Katja},
  booktitle  = {The Semantic Web -- ISWC 2017},
  title      = {The Odyssey Approach for Optimizing Federated SPARQL Queries},
  year       = {2017},
  address    = {Cham},
  editor     = {d'Amato, Claudia and Fernandez, Miriam and Tamma, Valentina and Lecue, Freddy and Cudr{\'e}-Mauroux, Philippe and Sequeda, Juan and Lange, Christoph and Heflin, Jeff},
  pages      = {471--489},
  publisher  = {Springer International Publishing},
  abstract   = {Answering queries over a federation of SPARQL endpoints requires combining data from more than one data source. Optimizing queries in such scenarios is particularly challenging not only because of (i) the large variety of possible query execution plans that correctly answer the query but also because (ii) there is only limited access to statistics about schema and instance data of remote sources. To overcome these challenges, most federated query engines rely on heuristics to reduce the space of possible query execution plans or on dynamic programming strategies to produce optimal plans. Nevertheless, these plans may still exhibit a high number of intermediate results or high execution times because of heuristics and inaccurate cost estimations. In this paper, we present Odyssey, an approach that uses statistics that allow for a more accurate cost estimation for federated queries and therefore enables Odyssey to produce better query execution plans. Our experimental results show that Odyssey produces query execution plans that are better in terms of data transfer and execution time than state-of-the-art optimizers. Our experiments using the FedBench benchmark show execution time gains of at least 25 times on average.},
  comment    = {The paper is very interesting and can be a great base for the federated query approach even for the LTQP one I think I can get some good insight and compare the particularity of this problem with the approach of that paper.},
  file       = {:papers/The Odyssey Approach for Optimizing Federated.pdf:PDF},
  groups     = {charateristic set, federated queries},
  isbn       = {978-3-319-68288-4},
  ranking    = {rank5},
  readstatus = {read},
}

@InProceedings{Schwarte2011,
  author    = {Schwarte, Andreas and Haase, Peter and Hose, Katja and Schenkel, Ralf and Schmidt, Michael},
  booktitle = {The Semantic Web -- ISWC 2011},
  title     = {FedX: Optimization Techniques for Federated Query Processing on Linked Data},
  year      = {2011},
  address   = {Berlin, Heidelberg},
  editor    = {Aroyo, Lora and Welty, Chris and Alani, Harith and Taylor, Jamie and Bernstein, Abraham and Kagal, Lalana and Noy, Natasha and Blomqvist, Eva},
  pages     = {601--616},
  publisher = {Springer Berlin Heidelberg},
  abstract  = {Motivated by the ongoing success of Linked Data and the growing amount of semantic data sources available on the Web, new challenges to query processing are emerging. Especially in distributed settings that require joining data provided by multiple sources, sophisticated optimization techniques are necessary for efficient query processing. We propose novel join processing and grouping techniques to minimize the number of remote requests, and develop an effective solution for source selection in the absence of preprocessed metadata. We present FedX, a practical framework that enables efficient SPARQL query processing on heterogeneous, virtually integrated Linked Data sources. In experiments, we demonstrate the practicability and efficiency of our framework on a set of real-world queries and data sources from the Linked Open Data cloud. With FedX we achieve a significant improvement in query performance over state-of-the-art federated query engines.},
  file      = {:papers/Optimization Techniques for Federated Query Processing on Linked Data.pdf:PDF},
  groups    = {federated queries},
  isbn      = {978-3-642-25073-6},
}

@InProceedings{Du2012,
  author    = {Du, Fang and Chen, Yueguo and Du, Xiaoyong},
  booktitle = {Database Systems for Advanced Applications},
  title     = {Partitioned Indexes for Entity Search over RDF Knowledge Bases},
  year      = {2012},
  address   = {Berlin, Heidelberg},
  editor    = {Lee, Sang-goo and Peng, Zhiyong and Zhou, Xiaofang and Moon, Yang-Sae and Unland, Rainer and Yoo, Jaesoo},
  pages     = {141--155},
  publisher = {Springer Berlin Heidelberg},
  abstract  = {The rapid growth of RDF data in RDF knowledge bases calls for efficient query processing techniques. This paper focuses on the star-style SPARQL join queries, which is very common when users want to search information of entities from RDF knowledge bases. We observe that the computational cost of such queries mainly comes from loading a large portion of predicate-ahead indexes. We therefore propose to partition the whole RDF knowledge bases based on the schema of individual entities, so that only entities of similar schemas are allocated into the same cluster. Such a partitioning strategy generates a pruning mechanism that effectively isolate the correlations of partitions and the queries. Consequently, queries are only conducted over a small number of partitions with small predicate-ahead indexes. Experiments over a large real-life RDF data set show the significant performance improvements achieved by our partitioned indexing techniques.},
  file      = {:papers/Partitioned Indexes for Entity Search.pdf:PDF},
  groups    = {charateristic set},
  isbn      = {978-3-642-29038-1},
}

@Misc{Taelman2023,
  author    = {Taelman, Ruben and Verborgh, Ruben},
  title     = {Link Traversal Query Processing Over Decentralized Environments with Structural Assumptions},
  year      = {2023},
  abstract  = {To counter societal and economic problems caused by data silos on the Web, efforts such as Solid strive to reclaim private data by storing it in permissioned documents over a large number of personal vaults across the Web. Building applications on top of such a decentralized Knowledge Graph involves significant technical challenges: centralized aggregation prior to query processing is impossible for legal reasons, and current federated querying techniques cannot handle this large scale of distribution at the expected performance. We propose an extension to Link Traversal Query Processing (LTQP) that incorporates structural properties within decentralized environments to tackle their unprecedented scale. In this article, we analyze the structural properties of the Solid decentralization ecosystem that are relevant for query execution, we introduce novel LTQP algorithms leveraging these structural properties, and evaluate their effectiveness. Our experiments indicate that these new algorithms obtain correct results in the order of seconds, which existing algorithms cannot achieve. This work reveals that a traversal-based querying method using structural assumptions can be effective for large-scale decentralization, but that advances are needed in the area of query planning for LTQP to handle more complex queries. These insights open the door to query-driven decentralized applications, in which declarative queries shield developers from the inherent complexity of a decentralized landscape.},
  address   = {Cham},
  booktitle = {The Semantic Web -- ISWC 2023},
  editor    = {Payne, Terry R. and Presutti, Valentina and Qi, Guilin and Poveda-Villal{\'o}n, Mar{\'i}a and Stoilos, Giorgos and Hollink, Laura and Kaoudi, Zoi and Cheng, Gong and Li, Juanzi},
  file      = {:references/Link Traversal Query Processing over Decentralized Environments with Structural Assumptions.pdf:PDF},
  groups    = {Link traversal, Link Traversal Query Processing},
  isbn      = {978-3-031-47240-4},
  pages     = {3--22},
  publisher = {Springer Nature Switzerland},
}

@Misc{hartig2016walking,
  author        = {Olaf Hartig and M. Tamer Özsu},
  title         = {Walking without a Map: Optimizing Response Times of Traversal-Based Linked Data Queries (Extended Version)},
  year          = {2016},
  archiveprefix = {arXiv},
  eprint        = {1607.01046},
  file          = {:references/Walking without a Map.pdf:PDF},
  groups        = {Link Traversal Query Processing},
  primaryclass  = {cs.DB},
  priority      = {prio1},
}

@InCollection{Hartig2014,
  author     = {Olaf Hartig},
  booktitle  = {Emerging Directions in Database Systems and Applications},
  publisher  = {Chapman and Hall/{CRC}},
  title      = {Linked Data Query Processing Based on Link Traversal},
  year       = {2014},
  month      = may,
  pages      = {263--283},
  doi        = {10.1201/b16859-15},
  file       = {:papers/Linked Data Query Processing Based on Link Traversal.pdf:PDF},
  groups     = {Link Traversal Query Processing, query plan LTQP, query planning},
  ranking    = {rank3},
  readstatus = {skimmed},
  url        = {https://doi.org/10.1201/b16859-15},
}

@InProceedings{Ladwig2010,
  author    = {Ladwig, G{\"u}nter and Tran, Thanh},
  booktitle = {The Semantic Web -- ISWC 2010},
  title     = {Linked Data Query Processing Strategies},
  year      = {2010},
  address   = {Berlin, Heidelberg},
  editor    = {Patel-Schneider, Peter F. and Pan, Yue and Hitzler, Pascal and Mika, Peter and Zhang, Lei and Pan, Jeff Z. and Horrocks, Ian and Glimm, Birte},
  pages     = {453--469},
  publisher = {Springer Berlin Heidelberg},
  abstract  = {Recently, processing of queries on linked data has gained attention. We identify and systematically discuss three main strategies: a bottom-up strategy that discovers new sources during query processing by following links between sources, a top-down strategy that relies on complete knowledge about the sources to select and process relevant sources, and a mixed strategy that assumes some incomplete knowledge and discovers new sources at run-time. To exploit knowledge discovered at run-time, we propose an additional step, explicitly scheduled during query processing, called correct source ranking. Additionally, we propose the adoption of stream-based query processing to deal with the unpredictable nature of data access in the distributed Linked Data environment. In experiments, we show that our implementation of the mixed strategy leads to early reporting of results and thus, more responsive query processing, while not requiring complete knowledge.},
  file      = {:papers/Linked Data Query Processing Strategies.pdf:PDF},
  groups    = {Link Traversal Query Processing, query plan LTQP, query planning},
  isbn      = {978-3-642-17746-0},
  priority  = {prio1},
}

@InProceedings{Ladwig2011,
  author     = {Ladwig, G{\"u}nter and Tran, Thanh},
  booktitle  = {The Semantic Web: Research and Applications},
  title      = {SIHJoin: Querying Remote and Local Linked Data},
  year       = {2011},
  address    = {Berlin, Heidelberg},
  editor     = {Antoniou, Grigoris and Grobelnik, Marko and Simperl, Elena and Parsia, Bijan and Plexousakis, Dimitris and De Leenheer, Pieter and Pan, Jeff},
  pages      = {139--153},
  publisher  = {Springer Berlin Heidelberg},
  abstract   = {The amount of Linked Data is increasing steadily. Optimized top-down Linked Data query processing based on complete knowledge about all sources, bottom-up processing based on run-time discovery of sources as well as a mixed strategy that combines them have been proposed. A particular problem with Linked Data processing is that the heterogeneity of the sources and access options lead to varying input latency, rendering the application of blocking join operators infeasible. Previous work partially address this by proposing a non-blocking iterator-based operator and another one based on symmetric-hash join. Here, we propose detailed cost models for these two operators to systematically compare them, and to allow for query optimization. Further, we propose a novel operator called the Symmetric Index Hash Join to address one open problem of Linked Data query processing: to query not only remote, but also local Linked Data. We perform experiments on real-world datasets to compare our approach against the iterator-based baseline, and create a synthetic dataset to more systematically analyze the impacts of the individual components captured by the proposed cost models.},
  comment    = {Article très intéressant qui présente un bon modèle de cout pour des join ainsi que leur implmentation d'un nouvelle algorithm de join dont son coût ne dépend pas du nombre de source ce qui en LTQP est vraiment très bon. Par contre pour le calcul de sélectivité, je ne suis pas sur comment cela ce fait.},
  file       = {:papers/SIHJoin Querying Remote and Local Linked.pdf:PDF},
  groups     = {Link Traversal Query Processing, query plan LTQP, query planning},
  isbn       = {978-3-642-21034-1},
  ranking    = {rank5},
  readstatus = {read},
}

@InProceedings{Hartig2011,
  author    = {Hartig, Olaf},
  booktitle = {The Semantic Web: Research and Applications},
  title     = {Zero-Knowledge Query Planning for an Iterator Implementation of Link Traversal Based Query Execution},
  year      = {2011},
  address   = {Berlin, Heidelberg},
  editor    = {Antoniou, Grigoris and Grobelnik, Marko and Simperl, Elena and Parsia, Bijan and Plexousakis, Dimitris and De Leenheer, Pieter and Pan, Jeff},
  pages     = {154--169},
  publisher = {Springer Berlin Heidelberg},
  abstract  = {Link traversal based query execution is a new query execution paradigm for the Web of Data. This approach allows the execution engine to discover potentially relevant data during the query execution and, thus, enables users to tap the full potential of the Web. In earlier work we propose to implement the idea of link traversal based query execution using a synchronous pipeline of iterators. While this idea allows for an easy and efficient implementation, it introduces restrictions that cause less comprehensive result sets. In this paper we address this limitation. We analyze the restrictions and discuss how the evaluation order of a query may affect result set size and query execution costs. To identify a suitable order, we propose a heuristic for our scenario where no a-priory information about relevant data sources is present. We evaluate this heuristic by executing real-world queries over the Web of Data.},
  file      = {:papers/zero knowledge query planning.pdf:PDF},
  groups    = {query plan LTQP, query planning},
  isbn      = {978-3-642-21034-1},
  priority  = {prio1},
}

@Misc{kashif2021,
  author        = {Rabbani, Kashif and Lissandrini, Matteo and Hose, Katja},
  title         = {Optimizing SPARQL Queries using Shape Statistics},
  year          = {2021},
  comment-id357 = {It's a really good paper that describe, and show how to use shape with added statistic plus global void statistic for query planning. I think it can easily be the based for my adaptative approach, I also think that there are some differences with this approach due to the adaptativity and the scoping of the data source.},
  doi           = {10.5441/002/EDBT.2021.59},
  file          = {:papers/Optimizing SPARQL queries using shape statistics.pdf:PDF},
  groups        = {Query plan with shape, query planning, idea to try},
  keywords      = {Database Technology},
  language      = {en},
  publisher     = {OpenProceedings.org},
  ranking       = {rank5},
  readstatus    = {read},
  url           = {https://openproceedings.org/2021/conf/edbt/p202.pdf},
}

@InProceedings{Haller2023AQA,
  author     = {David Haller},
  booktitle  = {PhD@VLDB},
  title      = {A Query-Driven Approach for SHACL Type Inference},
  year       = {2023},
  file       = {:papers/A Query Driven Approach for SHACL Type Inference.pdf:PDF},
  groups     = {Query plan with shape, query planning},
  ranking    = {rank1},
  readstatus = {skimmed},
  url        = {https://api.semanticscholar.org/CorpusID:261102495},
}

@InProceedings{Abbas2018,
  author     = {Abbas, Abdullah and Genev{\`e}s, Pierre and Roisin, C{\'e}cile and Laya{\"i}da, Nabil},
  booktitle  = {Web Engineering},
  title      = {Selectivity Estimation for SPARQL Triple Patterns with Shape Expressions},
  year       = {2018},
  address    = {Cham},
  editor     = {Mikkonen, Tommi and Klamma, Ralf and Hern{\'a}ndez, Juan},
  pages      = {195--209},
  publisher  = {Springer International Publishing},
  abstract   = {We optimize the evaluation of conjunctive SPARQL queries, on big RDF graphs, by taking advantage of ShEx schema constraints. Our optimization is based on computing ranks for query triple patterns, which indicates their order of execution. We first define a set of well-formed ShEx schemas, that possess interesting characteristics for SPARQL query optimization. We then define our optimization method by exploiting information extracted from a ShEx schema. The experimentations performed shows the advantages of applying our optimization on the top of an existing state-of-the-art query evaluation system.},
  comment    = {Je pense que c'est vraiment bon est tr'es lier a notre probleme.},
  file       = {:papers/Selectivity Estimation for SPARQL Triple Patterns with Shape Expressions.pdf:PDF},
  groups     = {Query plan with shape, query planning, reading, idea to try},
  isbn       = {978-3-319-91662-0},
  priority   = {prio1},
  ranking    = {rank5},
  readstatus = {read},
}

@Article{10.1007/s00778-017-0480-7,
  author     = {Leis, Viktor and Radke, Bernhard and Gubichev, Andrey and Mirchev, Atanas and Boncz, Peter and Kemper, Alfons and Neumann, Thomas},
  journal    = {The VLDB Journal},
  title      = {Query Optimization through the Looking Glass, and What We Found Running the Join Order Benchmark},
  year       = {2018},
  issn       = {1066-8888},
  month      = {oct},
  number     = {5},
  pages      = {643–668},
  volume     = {27},
  abstract   = {Finding a good join order is crucial for query performance. In this paper, we introduce the Join Order Benchmark that works on real-life data riddled with correlations and introduces 113 complex join queries. We experimentally revisit the main components in the classic query optimizer architecture using a complex, real-world data set and realistic multi-join queries. For this purpose, we describe cardinality-estimate injection and extraction techniques that allow us to compare the cardinality estimators of multiple industrial SQL implementations on equal footing, and to characterize the value of having perfect cardinality estimates. Our investigation shows that all industrial-strength cardinality estimators routinely produce large errors: though cardinality estimation using table samples solves the problem for single-table queries, there are still no techniques in industrial systems that can deal accurately with join-crossing correlated query predicates. We further show that while estimates are essential for finding a good join order, query performance is unsatisfactory if the query engine relies too heavily on these estimates. Using another set of experiments that measure the impact of the cost model, we find that it has much less influence on query performance than the cardinality estimates. We investigate plan enumeration techniques comparing exhaustive dynamic programming with heuristic algorithms and find that exhaustive enumeration improves performance despite the suboptimal cardinality estimates. Finally, we extend our investigation from main-memory only, to also include disk-based query processing. Here, we find that though accurate cardinality estimation should be the first priority, other aspects such as modeling random versus sequential I/O are also important to predict query runtime.},
  address    = {Berlin, Heidelberg},
  doi        = {10.1007/s00778-017-0480-7},
  file       = {:papers/Query optimization through the looking glass.pdf:PDF},
  groups     = {query planning},
  issue_date = {October 2018},
  keywords   = {Join ordering, Cardinality estimation, Cost models, Query optimization},
  numpages   = {26},
  publisher  = {Springer-Verlag},
  url        = {https://doi.org/10.1007/s00778-017-0480-7},
}

@Article{Deshpande2007,
  author    = {Amol Deshpande and Zachary Ives and Vijayshankar Raman},
  journal   = {Foundations and Trends{\textregistered} in Databases},
  title     = {Adaptive Query Processing},
  year      = {2007},
  number    = {1},
  pages     = {1--140},
  volume    = {1},
  doi       = {10.1561/1900000001},
  file      = {:papers/Amol Deshpande, Zachary Ives, Vijayshankar Raman - Adaptive Query Processing (Foundations and Trends in Databases) (2007).pdf:PDF},
  groups    = {query planning},
  publisher = {Now Publishers},
  url       = {https://doi.org/10.1561/1900000001},
}

@InProceedings{Gubichev2014ExploitingTQ,
  author    = {Andrey Gubichev and Thomas Neumann},
  booktitle = {International Conference on Extending Database Technology},
  title     = {Exploiting the query structure for efficient join ordering in SPARQL queries},
  year      = {2014},
  file      = {:papers/Exploiting the query structure for efﬁcient join ordering in SPARQL queries.pdf:PDF},
  groups    = {query planning},
  url       = {https://api.semanticscholar.org/CorpusID:2256871},
}

@InProceedings{10.5555/2877789.2877794,
  author    = {Hagedorn, Stefan and Hose, Katja and Sattler, Kai-Uwe and Umbrich, J\"{u}rgen},
  booktitle = {Proceedings of the 5th International Conference on Consuming Linked Data - Volume 1264},
  title     = {Resource Planning for SPARQL Query Execution on Data Sharing Platforms},
  year      = {2014},
  address   = {Aachen, DEU},
  pages     = {49–60},
  publisher = {CEUR-WS.org},
  series    = {COLD'14},
  abstract  = {To increase performance, data sharing platforms often make use of clusters of nodes where certain tasks can be executed in parallel. Resource planning and especially deciding how many processors should be chosen to exploit parallel processing is complex in such a setup as increasing the number of processors does not always improve runtime due to communication overhead. Instead, there is usually an optimum number of processors for which using more or fewer processors leads to less efficient runtimes. In this paper, we present a cost model based on widely used statistics (VoiD) and show how to compute the optimum number of processors that should be used to evaluate a particular SPARQL query over a particular configuration and RDF dataset. Our first experiments show the general applicability of our approach but also how shortcomings in the used statistics limit the potential of optimization.},
  file      = {:papers/Resource Planning for SPARQL Query Execution.pdf:PDF},
  groups    = {query planning},
  location  = {Riva del Garda, Italy},
  numpages  = {12},
}

@Misc{verborgh2020guided,
  author        = {Ruben Verborgh and Ruben Taelman},
  title         = {Guided Link-Traversal-Based Query Processing},
  year          = {2020},
  archiveprefix = {arXiv},
  eprint        = {2005.02239},
  groups        = {Link Traversal Query Processing},
  primaryclass  = {cs.DB},
}

@InProceedings{10.1145/2463676.2465231,
  author    = {Hartig, Olaf},
  booktitle = {Proceedings of the 2013 ACM SIGMOD International Conference on Management of Data},
  title     = {SQUIN: A Traversal Based Query Execution System for the Web of Linked Data},
  year      = {2013},
  address   = {New York, NY, USA},
  pages     = {1081–1084},
  publisher = {Association for Computing Machinery},
  series    = {SIGMOD '13},
  abstract  = {The World Wide Web (WWW) currently evolves into a Web of Linked Data where content providers publish and link their data as they have done with hypertext for the last 20 years. We understand this emerging dataspace as a huge, distributed database which is -at best- partially known to query execution systems. To tap the full potential of the Web, such a system must be able to answer a query using data from initially unknown data sources. For this purpose, traditional query execution paradigms are unsuitable because those assume a fixed set of potentially relevant data sources beforehand.We demonstrate the query execution system SQUIN which implements a novel query execution approach. The main idea is to integrate the traversal of data links into the result construction process. This approach allows the execution engine to discover potentially relevant data during the query execution.In our demonstration, attendees can query the Web of Linked Data using SQUIN and, thus, learn about the new query execution approach. Furthermore, attendees can experience the suitability of the approach for Web applications by using a simple, Linked Data based mash-up implemented on top of SQUIN.},
  doi       = {10.1145/2463676.2465231},
  file      = {:papers/A Traversal Based Query Execution System.pdf:PDF},
  groups    = {Link Traversal Query Processing, query plan LTQP},
  isbn      = {9781450320375},
  keywords  = {web of data, link traversal based query execution, linked data},
  location  = {New York, New York, USA},
  numpages  = {4},
  priority  = {prio1},
  url       = {https://doi.org/10.1145/2463676.2465231},
}

@InProceedings{Miranker2012,
  author     = {Miranker, Daniel and Depena, Rodolfo and Jung, Hyunjoon and Sequeda, Juan},
  title      = {Diamond: A SPARQL Query Engine, for Linked Data Based on the Rete Match},
  year       = {2012},
  month      = {08},
  comment    = {Don't think it is really useful for me},
  file       = {:papers/A SPARQL Query Engine for Linked Data.pdf:PDF},
  groups     = {Link Traversal Query Processing, query plan LTQP},
  ranking    = {rank2},
  readstatus = {skimmed},
}

@InProceedings{Wang2012,
  author    = {Wang, Xin and Tiropanis, Thanassis and Davis, Hugh C.},
  booktitle = {The Semantic Web},
  title     = {Evaluating Graph Traversal Algorithms for Distributed SPARQL Query Optimization},
  year      = {2012},
  address   = {Berlin, Heidelberg},
  editor    = {Pan, Jeff Z. and Chen, Huajun and Kim, Hong-Gee and Li, Juanzi and Wu, Zhe and Horrocks, Ian and Mizoguchi, Riichiro and Wu, Zhaohui},
  pages     = {210--225},
  publisher = {Springer Berlin Heidelberg},
  abstract  = {Distributed SPARQL queries enable users to retrieve information by exploiting the increasing amount of linked data being published. However, industrial-strength distributed SPARQL query processing is still at its early stage for efficiently answering queries. Previous research shows that it is possible to apply methods from graph theory to optimize the performance of distributed SPARQL. In this paper we describe a framework that can simulate arbitrary RDF data networks to evaluate different approaches of distributed SPARQL query processing. Using this framework we further explore the graph traversal algorithms for distributed SPARQL optimization. We present an implementation of a Minimum-Spanning-Tree-based (MST-based) algorithm for distributed SPARQL processing, the performance of which is compared to other approaches using this evaluation framework. The contribution of this paper is to show that a MST-based approach seems to perform much better than other non graph-traversal-based approaches, and to provide an evaluation framework for evaluating distributed SPARQL processing.},
  file      = {:papers/Evaluating Graph Traversal Algorithms for Distributed SPARQL Query Optimization.pdf:PDF},
  groups    = {Link Traversal Query Processing},
  isbn      = {978-3-642-29923-0},
  priority  = {prio1},
}

@Article{Sklavos2022EstimatingTC,
  author  = {Anton Sklavos and Pavlos Fafalios and Yannis Tzitzikas},
  journal = {ArXiv},
  title   = {Estimating the Cost of Executing Link Traversal based SPARQL Queries},
  year    = {2022},
  volume  = {abs/2210.09100},
  file    = {:papers/Estimating the Cost of Executing Link Traversal based SPARQL Queries.pdf:PDF},
  groups  = {Link Traversal Query Processing},
  url     = {https://api.semanticscholar.org/CorpusID:252917692},
}

@InProceedings{Bogaerts2021LinkTW,
  author    = {Bart Bogaerts and Bas Ketsman and Younes Zeboudj and Heba Aamer and Ruben Taelman and Ruben Verborgh},
  booktitle = {RuleML+RR},
  title     = {Link Traversal with Distributed Subweb Specifications},
  year      = {2021},
  file      = {:references/SubwebSpecifications.pdf:PDF},
  groups    = {Link Traversal Query Processing},
  url       = {https://api.semanticscholar.org/CorpusID:244848944},
}

@Article{Alahmari2011EvaluatingSU,
  author  = {Fahad Alahmari},
  journal = {2011 Sixth International Conference on Digital Information Management},
  title   = {Evaluating SPARQL using query federation and link traversal},
  year    = {2011},
  pages   = {79-84},
  file    = {:papers/Evaluating SPARQL Using Query Federation and Link Traversal.pdf:PDF},
  groups  = {Link Traversal Query Processing},
  url     = {https://api.semanticscholar.org/CorpusID:17066695},
}

@InProceedings{Harth2016LinkTA,
  author = {A. Harth},
  title  = {Link traversal and reasoning in dynamic linked data knowledge bases},
  year   = {2016},
  file   = {:papers/Link Traversal and Reasoning in Dynamic Linked Data Knowledge Bases.pdf:PDF},
  groups = {Link Traversal Query Processing},
  url    = {https://api.semanticscholar.org/CorpusID:56854840},
}

@InProceedings{Harth2010,
  author    = {Harth, Andreas and Hose, Katja and Karnstedt, Marcel and Polleres, Axel and Sattler, Kai-Uwe and Umbrich, J\"{u}rgen},
  booktitle = {Proceedings of the 19th International Conference on World Wide Web},
  title     = {Data Summaries for On-Demand Queries over Linked Data},
  year      = {2010},
  address   = {New York, NY, USA},
  pages     = {411–420},
  publisher = {Association for Computing Machinery},
  series    = {WWW '10},
  abstract  = {Typical approaches for querying structured Web Data collect (crawl) and pre-process (index) large amounts of data in a central data repository before allowing for query answering. However, this time-consuming pre-processing phase however leverages the benefits of Linked Data -- where structured data is accessible live and up-to-date at distributed Web resources that may change constantly -- only to a limited degree, as query results can never be current. An ideal query answering system for Linked Data should return current answers in a reasonable amount of time, even on corpora as large as the Web. Query processors evaluating queries directly on the live sources require knowledge of the contents of data sources. In this paper, we develop and evaluate an approximate index structure summarising graph-structured content of sources adhering to Linked Data principles, provide an algorithm for answering conjunctive queries over Linked Data on theWeb exploiting the source summary, and evaluate the system using synthetically generated queries. The experimental results show that our lightweight index structure enables complete and up-to-date query results over Linked Data, while keeping the overhead for querying low and providing a satisfying source ranking at no additional cost.},
  doi       = {10.1145/1772690.1772733},
  file      = {:papers/Data Summaries for On Demand Queries over Linked Data.pdf:PDF},
  groups    = {Link Traversal Query Processing, query plan LTQP},
  isbn      = {9781605587998},
  keywords  = {index structures, rdf querying, linked data},
  location  = {Raleigh, North Carolina, USA},
  numpages  = {10},
  url       = {https://doi.org/10.1145/1772690.1772733},
}

@InProceedings{10.1007/978-3-642-04930-9_19,
  author    = {Hartig, Olaf and Bizer, Christian and Freytag, Johann-Christoph},
  booktitle = {Proceedings of the 8th International Semantic Web Conference},
  title     = {Executing SPARQL Queries over the Web of Linked Data},
  year      = {2009},
  address   = {Berlin, Heidelberg},
  pages     = {293–309},
  publisher = {Springer-Verlag},
  series    = {ISWC '09},
  abstract  = {The Web of Linked Data forms a single, globally distributed dataspace. Due to the openness of this dataspace, it is not possible to know in advance all data sources that might be relevant for query answering. This openness poses a new challenge that is not addressed by traditional research on federated query processing. In this paper we present an approach to execute SPARQL queries over the Web of Linked Data. The main idea of our approach is to discover data that might be relevant for answering a query during the query execution itself. This discovery is driven by following RDF links between data sources based on URIs in the query and in partial results. The URIs are resolved over the HTTP protocol into RDF data which is continuously added to the queried dataset. This paper describes concepts and algorithms to implement our approach using an iterator-based pipeline. We introduce a formalization of the pipelining approach and show that classical iterators may cause blocking due to the latency of HTTP requests. To avoid blocking, we propose an extension of the iterator paradigm. The evaluation of our approach shows its strengths as well as the still existing challenges.},
  doi       = {10.1007/978-3-642-04930-9_19},
  file      = {:papers/Executing SPARQL Queries over the Web of Linked Data.pdf:PDF},
  groups    = {Link Traversal Query Processing},
  isbn      = {9783642049293},
  location  = {Chantilly, VA},
  numpages  = {17},
  url       = {https://doi.org/10.1007/978-3-642-04930-9_19},
}

@InProceedings{Thapa2023,
  author        = {Thapa, Ratan Bahadur and Giese, Martin},
  booktitle     = {The Semantic Web -- ISWC 2023},
  title         = {Optimizing SPARQL Queries with SHACL},
  year          = {2023},
  address       = {Cham},
  editor        = {Payne, Terry R. and Presutti, Valentina and Qi, Guilin and Poveda-Villal{\'o}n, Mar{\'i}a and Stoilos, Giorgos and Hollink, Laura and Kaoudi, Zoi and Cheng, Gong and Li, Juanzi},
  pages         = {41--60},
  publisher     = {Springer Nature Switzerland},
  abstract      = {We propose a set of optimizations that can be applied to a given SPARQL query, and that guarantee that the optimized query has the same answers under bag semantics as the original query, provided that the queried RDF graph validates certain SHACL constraints. Our optimizations exploit the relationship between graph patterns in the SPARQL queries and the SHACL constraints that describe those patterns in the RDF graph. We prove the correctness of these optimizations and show how they can be propagated to larger queries while preserving answers. Further, we prove the confluence of rewritings that employ these optimizations, guaranteeing convergence to the same optimized query regardless of the rewriting order.},
  comment-id357 = {Formulate multiple optimization using shape could be useful for once motivate shapes, second as an inspiration for our own formulations},
  file          = {:papers/Optimizing SPARQL queries with SHACL.pdf:PDF},
  groups        = {query containment with RDF},
  isbn          = {978-3-031-47240-4},
  priority      = {prio1},
}

@InProceedings{Asprino2023,
  author    = {Asprino, Luigi and Ceriani, Miguel},
  booktitle = {The Semantic Web -- ISWC 2023},
  title     = {How is Your Knowledge Graph Used: Content-Centric Analysis of SPARQL Query Logs},
  year      = {2023},
  address   = {Cham},
  editor    = {Payne, Terry R. and Presutti, Valentina and Qi, Guilin and Poveda-Villal{\'o}n, Mar{\'i}a and Stoilos, Giorgos and Hollink, Laura and Kaoudi, Zoi and Cheng, Gong and Li, Juanzi},
  pages     = {197--215},
  publisher = {Springer Nature Switzerland},
  abstract  = {Knowledge graphs (KGs) are used to integrate and persist information useful to organisations, communities, or the general public. It is essential to understand how KGs are used so as to evaluate the strengths and shortcomings of semantic web standards, data modelling choices formalised in ontologies, deployment settings of triple stores etc. One source of information on the usage of the KGs is the query logs, but making sense of hundreds of thousands of log entries is not trivial. Previous works that studied available logs from public SPARQL endpoints mainly focused on the general syntactic properties of the queries disregarding the semantics and their intent. We introduce a novel, content-centric, approach that we call query log summarisation, in which we group the queries that can be derived from some common pattern. The type of patterns considered in this work is query templates, i.e. common blueprints from which multiple queries can be generated by the replacement of parameters with constants. Moreover, we present an algorithm able to summarise a query log as a list of templates whose time and space complexity is linear with respect to the size of the input (number and dimension of queries). We experimented with the algorithm on the query logs of the Linked SPARQL Queries dataset showing promising results.},
  file      = {:papers/The Semantic Web –_ISWC 2023.pdf:PDF},
  groups    = {analysis of KG},
  isbn      = {978-3-031-47240-4},
}

@InProceedings{10.1145/1066157.1066171,
  author    = {Babu, Shivnath and Bizarro, Pedro and DeWitt, David},
  booktitle = {Proceedings of the 2005 ACM SIGMOD International Conference on Management of Data},
  title     = {Proactive Re-Optimization},
  year      = {2005},
  address   = {New York, NY, USA},
  pages     = {107–118},
  publisher = {Association for Computing Machinery},
  series    = {SIGMOD '05},
  abstract  = {Traditional query optimizers rely on the accuracy of estimated statistics to choose good execution plans. This design often leads to suboptimal plan choices for complex queries, since errors in estimates for intermediate subexpressions grow exponentially in the presence of skewed and correlated data distributions. Reoptimization is a promising technique to cope with such mistakes. Current re-optimizers first use a traditional optimizer to pick a plan, and then react to estimation errors and resulting suboptimalities detected in the plan during execution. The effectiveness of this approach is limited because traditional optimizers choose plans unaware of issues affecting reoptimization. We address this problem using proactive reoptimization, a new approach that incorporates three techniques: i) the uncertainty in estimates of statistics is computed in the form of bounding boxes around these estimates, ii) these bounding boxes are used to pick plans that are robust to deviations of actual values from their estimates, and iii) accurate measurements of statistics are collected quickly and efficiently during query execution. We present an extensive evaluation of these techniques using a prototype proactive re-optimizer named Rio. In our experiments Rio outperforms current re-optimizers by up to a factor of three.},
  doi       = {10.1145/1066157.1066171},
  file      = {:papers/Proactive Re-Optimization.pdf:PDF},
  groups    = {query planning},
  isbn      = {1595930604},
  location  = {Baltimore, Maryland},
  numpages  = {12},
  url       = {https://doi.org/10.1145/1066157.1066171},
}

@InBook{Gayo2018,
  author    = {Gayo, Jose Emilio Labra and Prud'hommeaux, Eric and Boneva, Iovka and Kontokostas, Dimitris},
  pages     = {55--117},
  publisher = {Springer International Publishing},
  title     = {Shape Expressions},
  year      = {2018},
  address   = {Cham},
  isbn      = {978-3-031-79478-0},
  abstract  = {Shape Expressions (ShEx) is a schema language for describing RDF graphs structures. ShEx was originally developed in late 2013 to provide a human-readable syntax for OSLC Resource Shapes. It added disjunctions, so it was more expressive than Resource Shapes. Tokens in the language were adopted from Turtle [80] and SPARQL [44] with tokens for grouping, repetition and wildcards from regular expression and RelaxNG Compact Syntax [100]. The language was described in a paper [80] and codified in a June 2014 W3C member submission [92] which included a primer and a semantics specification. This was later deemed ``ShEx 1.0''.},
  booktitle = {Validating RDF Data},
  doi       = {10.1007/978-3-031-79478-0_4},
  file      = {:papers/shape_expressions.pdf:PDF},
  groups    = {shape},
  url       = {https://doi.org/10.1007/978-3-031-79478-0_4},
}

@InBook{Gayo2018a,
  author    = {Gayo, Jose Emilio Labra and Prud'hommeaux, Eric and Boneva, Iovka and Kontokostas, Dimitris},
  pages     = {195--231},
  publisher = {Springer International Publishing},
  title     = {Applications},
  year      = {2018},
  address   = {Cham},
  isbn      = {978-3-031-79478-0},
  abstract  = {In this chapter we describe several applications of RDF validation. We start with the WebIndex, a medium-size linked data portal that was one of the earliest applications of ShEx. We describe it using ShEx and SHACL so the reader can see how both formalisms can be applied to describe RDF data.},
  booktitle = {Validating RDF Data},
  doi       = {10.1007/978-3-031-79478-0_6},
  file      = {:papers/applications.pdf:PDF},
  groups    = {shape},
  url       = {https://doi.org/10.1007/978-3-031-79478-0_6},
}

@InBook{Gayo2018b,
  author    = {Gayo, Jose Emilio Labra and Prud'hommeaux, Eric and Boneva, Iovka and Kontokostas, Dimitris},
  pages     = {119--194},
  publisher = {Springer International Publishing},
  title     = {SHACL},
  year      = {2018},
  address   = {Cham},
  isbn      = {978-3-031-79478-0},
  abstract  = {Shapes Constraint Language (SHACL) has been developed by the W3C RDF Data Shapes Working Group, which was chartered in 2014 with the goal to ``produce a language for defining structural constraints on RDF graphs [6].''},
  booktitle = {Validating RDF Data},
  doi       = {10.1007/978-3-031-79478-0_5},
  file      = {:papers/shacl.pdf:PDF},
  groups    = {shape},
  url       = {https://doi.org/10.1007/978-3-031-79478-0_5},
}

@InBook{Gayo2018c,
  author    = {Gayo, Jose Emilio Labra and Prud'hommeaux, Eric and Boneva, Iovka and Kontokostas, Dimitris},
  pages     = {233--266},
  publisher = {Springer International Publishing},
  title     = {Comparing ShEx and SHACL},
  year      = {2018},
  address   = {Cham},
  isbn      = {978-3-031-79478-0},
  abstract  = {In this chapter we present a comparison between ShEx and SHACL. The technologies have similar goals and similar features. In fact at the start of the Data Shapes Working Group in 2014, convergence on a unified approach was considered possible. However, this did not happen and as of July 2017 both technologies are maintained as separate solutions.},
  booktitle = {Validating RDF Data},
  doi       = {10.1007/978-3-031-79478-0_7},
  file      = {:papers/Comparing ShEx and SHACL.pdf:PDF},
  groups    = {shape},
  url       = {https://doi.org/10.1007/978-3-031-79478-0_7},
}

@InProceedings{Mailis2019,
  author    = {Mailis, Theofilos and Kotidis, Yannis and Nikolopoulos, Vaggelis and Kharlamov, Evgeny and Horrocks, Ian and Ioannidis, Yannis},
  booktitle = {Proceedings of the 2019 International Conference on Management of Data},
  title     = {An Efficient Index for RDF Query Containment},
  year      = {2019},
  address   = {New York, NY, USA},
  pages     = {1499–1516},
  publisher = {Association for Computing Machinery},
  series    = {SIGMOD '19},
  abstract  = {Query containment is a fundamental operation used to expedite query processing in view materialisation and query caching techniques. Since query containment has been shown to be NP-complete for arbitrary conjunctive queries on RDF graphs, we introduce a simpler form of conjunctive queries that we name f-graph queries. We first show that containment checking for f-graph queries can be solved in polynomial time. Based on this observation, we propose a novel indexing structure, named mv-index, that allows for fast containment checking between a single f-graph query and an arbitrary number of stored queries. Search is performed in polynomial time in the combined size of the query and the index. We then show how our algorithms and structures can be extended for arbitrary conjunctive queries on RDF graphs by introducing f-graph witnesses, i.e., f-graph representatives of conjunctive queries. F-graph witnesses have the following interesting property, a conjunctive query for RDF graphs is contained in another query only if its corresponding f-graph witness is also contained in it. The latter allows to use our indexing structure for the general case of conjunctive query containment. This translates in practice to microseconds or less for the containment test against hundreds of thousands of queries that are indexed within our structure.},
  doi       = {10.1145/3299869.3319864},
  file      = {:papers/An Efficient Index for RDF Query Containment.pdf:PDF},
  groups    = {query containment with RDF},
  isbn      = {9781450356435},
  keywords  = {view materialisation, resource description framework (rdf), rdf schema, radix tree, query containment, ptime, polynomial time, database views},
  location  = {Amsterdam, Netherlands},
  numpages  = {18},
  url       = {https://doi.org/10.1145/3299869.3319864},
}

@InProceedings{Verstraete2022,
  author    = {Verstraete, Melanie and Verbrugge, Sofie and Colle, Didier},
  booktitle = {31st ITS European Conference},
  title     = {Solid: Enabler of decentralized, digital platforms ecosystems},
  year      = {2022},
  pages     = {1--19},
  file      = {:article/Solid Enabler of decentralized, digital platforms ecosystems:},
  groups    = {Commodification internet, Solid},
  priority  = {prio1},
}

@InProceedings{Mechant2021,
  author    = {Mechant, Peter and De Wolf, Ralf and Van Compernolle, Mathias and Joris, Glen and Evens, Tom and De Marez, Lieven},
  booktitle = {2021 14th CMI International Conference - Critical ICT Infrastructures and Platforms (CMI)},
  title     = {Saving the web by decentralizing data networks? A socio-technical reflection on the promise of decentralization and personal data stores},
  year      = {2021},
  pages     = {1-6},
  doi       = {10.1109/CMI53512.2021.9663788},
  file      = {:article/Saving the web by decentralizing data networks A socio-technical reflection on the promise of decentralization and personal data stores.pdf:PDF},
  groups    = {Commodification internet, Solid},
}

@InProceedings{aimonierdavat:hal-04538238,
  author      = {Aimonier-Davat, Julien and Dang, Minh-Hoang and Molli, Pascal and N{\'e}delec, Brice and Skaf-Molli, Hala},
  booktitle   = {{The ACM Web Conference 2024 (WWW '24)}},
  title       = {{FedUP: Querying Large-Scale Federations of SPARQL Endpoints}},
  year        = {2024},
  address     = {Singapore, Singapore},
  month       = May,
  doi         = {10.1145/3589334.3645704},
  file        = {:papers/FedUP.pdf:PDF},
  groups      = {data sumary, idea to try},
  hal_id      = {hal-04538238},
  hal_version = {v1},
  keywords    = {Semantic Web ; Federated Query Processing ; Source Selection},
  pdf         = {https://hal.science/hal-04538238/file/paper.pdf},
  ranking     = {rank5},
  readstatus  = {read},
  url         = {https://hal.science/hal-04538238},
}

@InProceedings{568671,
  author    = {Mendelzon, A.O. and Mihaila, G.A. and Milo, T.},
  booktitle = {Fourth International Conference on Parallel and Distributed Information Systems},
  title     = {Querying the World Wide Web},
  year      = {1996},
  pages     = {80-91},
  doi       = {10.1109/PDIS.1996.568671},
  groups    = {Link Traversal Query Processing},
  keywords  = {Web sites;Navigation;Computer science;Network servers;Network topology;Costs;Database languages;Calculus;Java;Search engines},
}

@InProceedings{Taelman2017,
  author    = {Taelman, Ruben and Verborgh, Ruben},
  booktitle = {Proceedings of the 9th Knowledge Capture Conference},
  title     = {Declaratively Describing Responses of Hypermedia-Driven Web APIs},
  year      = {2017},
  address   = {New York, NY, USA},
  publisher = {Association for Computing Machinery},
  series    = {K-CAP '17},
  abstract  = {While humans browse the Web by following links, these hypermedia links can also be used by machines for browsing. While efforts such as Hydra semantically describe the hypermedia controls on Web interfaces to enable smarter interface-agnostic clients, they are largely limited to the input parameters to interfaces, and clients therefore do not know what response to expect from these interfaces. In order to convey such expectations, interfaces need to declaratively describe the response structure of their parameterized hypermedia controls. We therefore explored techniques to represent this parameterized response structure in a generic but expressive way. In this work, we discuss four different approaches for declaring a response structure, and we compare them based on a model that we introduce. Based on this model, we conclude that a SHACL shape-based approach can be used for declaring such a parameterized response structure, as it conforms to the REST architectural style that has helped shape the Web into its current form.},
  articleno = {34},
  doi       = {10.1145/3148011.3154467},
  groups    = {hypermedia},
  isbn      = {9781450355537},
  keywords  = {SHACL, REST, RDF, Linked Data, Hypermedia, Hydra},
  location  = {<conf-loc>, <city>Austin</city>, <state>TX</state>, <country>USA</country>, </conf-loc>},
  numpages  = {4},
  url       = {https://doi.org/10.1145/3148011.3154467},
}

@InProceedings{10.1145/2745754.2745766,
  author    = {Czerwi\'{n}ski, Wojciech and Martens, Wim and Parys, Pawel and Przybylko, Marcin},
  booktitle = {Proceedings of the 34th ACM SIGMOD-SIGACT-SIGAI Symposium on Principles of Database Systems},
  title     = {The (Almost) Complete Guide to Tree Pattern Containment},
  year      = {2015},
  address   = {New York, NY, USA},
  pages     = {117–130},
  publisher = {Association for Computing Machinery},
  series    = {PODS '15},
  abstract  = {Tree pattern queries are being investigated in database theory for more than a decade. They are a fundamental and flexible query mechanism and have been considered in the context of querying tree structured as well as graph structured data. We revisit their containment, validity, and satisfiability problem, both with and without schema information. We present a comprehensive overview of what is known about the complexity of containment and develop new techniques which allow us to obtain tractability- and hardness results for cases that have been open since the early work on tree pattern containment. For the tree pattern queries we consider in this paper, it is known that the containment problem does not depend on whether patterns are evaluated on trees or on graphs. This means that our results also shed new light on tree pattern queries on graphs.},
  doi       = {10.1145/2745754.2745766},
  groups    = {query-containment with schema},
  isbn      = {9781450327572},
  keywords  = {complexity, containment, dtd, graphs, optimization, satisfiability, schema, tree patterns, trees, validity, xml, xpath},
  location  = {Melbourne, Victoria, Australia},
  numpages  = {14},
  url       = {https://doi.org/10.1145/2745754.2745766},
}

@InProceedings{fujimoto2022simple,
  author    = {Fujimoto, Haruna and Suzuki, Nobutaka},
  booktitle = {WEBIST},
  title     = {A Simple Algorithm for Checking Pattern Query Containment under Shape Expression Schema.},
  year      = {2022},
  pages     = {278--285},
  groups    = {query-containment with schema},
}

@InProceedings{delva2023,
  author    = {{Delva, Thomas and Dimou, Anastasia and Jakubowksi, Maxime and Van den Bussche, Jan}},
  booktitle = {{Proceedings 26th International Conference on Extending Database Technology (EDBT 2023)}},
  title     = {{Data provenance for SHACL}},
  year      = {{2023}},
  pages     = {{285--297}},
  volume    = {{26}},
  file      = {:references/Data Provenance for SHACL.pdf:PDF},
  groups    = {convertion shape to query},
  isbn      = {{9783893180936}},
  issn      = {{2367-2005}},
  language  = {{eng}},
  location  = {{Ioannina, Greece}},
  url       = {{http://doi.org/10.48786/edbt.2023.23}},
}

@Misc{gayo2021creatingknowledgegraphssubsets,
  author        = {Jose Emilio Labra Gayo},
  title         = {Creating Knowledge Graphs Subsets using Shape Expressions},
  year          = {2021},
  archiveprefix = {arXiv},
  eprint        = {2110.11709},
  file          = {:references/creating Knowledge Graphs Subsets using.pdf:PDF},
  groups        = {convertion shape to query},
  primaryclass  = {cs.DB},
  url           = {https://arxiv.org/abs/2110.11709},
}

@Article{cyganiak2005relational,
  author  = {Cyganiak, Richard},
  journal = {Digital Media Systems Laboratory HP Laboratories Bristol. HPL-2005-170},
  title   = {A relational algebra for SPARQL},
  year    = {2005},
  number  = {9},
  volume  = {35},
  file    = {:references/A relational algebra for SPARQL.pdf:PDF},
  groups  = {SPARQL},
}

@InProceedings{Angles2008,
  author    = {Angles, Renzo and Gutierrez, Claudio},
  booktitle = {The Semantic Web - ISWC 2008},
  title     = {The Expressive Power of SPARQL},
  year      = {2008},
  address   = {Berlin, Heidelberg},
  editor    = {Sheth, Amit and Staab, Steffen and Dean, Mike and Paolucci, Massimo and Maynard, Diana and Finin, Timothy and Thirunarayan, Krishnaprasad},
  pages     = {114--129},
  publisher = {Springer Berlin Heidelberg},
  abstract  = {This paper studies the expressive power of SPARQL. The main result is that SPARQL and non-recursive safe Datalog with negation have equivalent expressive power, and hence, by classical results, SPARQL is equivalent from an expressiveness point of view to Relational Algebra. We present explicit generic rules of the transformations in both directions. Among other findings of the paper are the proof that negation can be simulated in SPARQL, that non-safe filters are superfluous, and that current SPARQL W3C semantics can be simplified to a standard compositional one.},
  file      = {:references/The Expressive Power of SPARQL.pdf:PDF},
  groups    = {SPARQL},
  isbn      = {978-3-540-88564-1},
}

@Misc{labragayo2017validatingdescribinglinkeddata,
  author        = {Jose-Emilio Labra-Gayo and Eric Prud'hommeaux and Harold Solbrig and Iovka Boneva},
  title         = {Validating and describing linked data portals using shapes},
  year          = {2017},
  archiveprefix = {arXiv},
  eprint        = {1701.08924},
  groups        = {shape},
  primaryclass  = {cs.DB},
  url           = {https://arxiv.org/abs/1701.08924},
}

@InProceedings{Corman2019,
  author    = {Corman, Julien and Florenzano, Fernando and Reutter, Juan L. and Savkovi{\'{c}}, Ognjen},
  booktitle = {The Semantic Web -- ISWC 2019},
  title     = {Validating Shacl Constraints over a Sparql Endpoint},
  year      = {2019},
  address   = {Cham},
  editor    = {Ghidini, Chiara and Hartig, Olaf and Maleshkova, Maria and Sv{\'a}tek, Vojt{\v{e}}ch and Cruz, Isabel and Hogan, Aidan and Song, Jie and Lefran{\c{c}}ois, Maxime and Gandon, Fabien},
  pages     = {145--163},
  publisher = {Springer International Publishing},
  abstract  = {shacl (Shapes Constraint Language) is a specification for describing and validating RDF graphs that has recently become a W3C recommendation. While the language is gaining traction in the industry, algorithms for shacl constraint validation are still at an early stage. A first challenge comes from the fact that RDF graphs are often exposed as sparql endpoints, and therefore only accessible via queries. Another difficulty is the absence of guidelines about the way recursive constraints should be handled. In this paper, we provide algorithms for validating a graph against a shacl schema, which can be executed over a sparql endpoint. We first investigate the possibility of validating a graph through a single query for non-recursive constraints. Then for the recursive case, since the problem has been shown to be NP-hard, we propose a strategy that consists in evaluating a small number of sparql queries over the endpoint, and using the answers to build a set of propositional formulas that are passed to a SAT solver. Finally, we show that the process can be optimized when dealing with recursive but tractable fragments of shacl, without the need for an external solver. We also present a proof-of-concept evaluation of this last approach.},
  file      = {:references/Validating Shacl Constraints over a Sparql Endpoint.pdf:PDF},
  groups    = {shape},
  isbn      = {978-3-030-30793-6},
}

@InProceedings{eschauzier_quweda_linkqueue_2023,
  author    = {Eschauzier, Ruben and Taelman, Ruben and Verborgh, Ruben},
  booktitle = {Proceedings of the 7th International Workshop on Storing, Querying and Benchmarking Knowledge Graphs},
  title     = {How Does the Link Queue Evolve during Traversal-Based Query Processing?},
  year      = {2023},
  month     = nov,
  groups    = {Link Traversal Query Processing},
  url       = {https://rubeneschauzier.github.io/QuWeDa2023-Link-Queue-Analysis-Camera-Ready/QuWeDa_2023_Link_Queue_Analysis_Final.pdf},
}

@InProceedings{Stuckenschmidt2004,
  author    = {Stuckenschmidt, Heiner and Vdovjak, Richard and Houben, Geert-Jan and Broekstra, Jeen},
  booktitle = {Proceedings of the 13th International Conference on World Wide Web},
  title     = {Index structures and algorithms for querying distributed RDF repositories},
  year      = {2004},
  address   = {New York, NY, USA},
  pages     = {631–639},
  publisher = {Association for Computing Machinery},
  series    = {WWW '04},
  abstract  = {A technical infrastructure for storing, querying and managing RDFdata is a key element in the current semantic web development. Systems like Jena, Sesame or the ICS-FORTH RDF Suite are widelyused for building semantic web applications. Currently, none ofthese systems supports the integrated querying of distributed RDF repositories. We consider this a major shortcoming since the semanticweb is distributed by nature. In this paper we present an architecture for querying distributed RDF repositories by extending the existing Sesame system. We discuss the implications of our architectureand propose an index structure as well as algorithms forquery processing and optimization in such a distributed context.},
  doi       = {10.1145/988672.988758},
  groups    = {source selection},
  isbn      = {158113844X},
  keywords  = {optimization, index structures, RDF querying},
  location  = {New York, NY, USA},
  numpages  = {9},
  url       = {https://doi.org/10.1145/988672.988758},
}

@InCollection{Doan2012,
  author    = {AnHai Doan and Alon Halevy and Zachary Ives},
  booktitle = {Principles of Data Integration},
  publisher = {Morgan Kaufmann},
  title     = {2 - Manipulating Query Expressions},
  year      = {2012},
  address   = {Boston},
  editor    = {AnHai Doan and Alon Halevy and Zachary Ives},
  isbn      = {978-0-12-416044-6},
  pages     = {21-63},
  doi       = {https://doi.org/10.1016/B978-0-12-416044-6.00002-8},
  file      = {:references/Manipulating Query Expressions.pdf:PDF},
  groups    = {query containment with RDF},
  url       = {https://www.sciencedirect.com/science/article/pii/B9780124160446000028},
}

@InProceedings{papadaki2018interactive,
  author    = {Papadaki, Maria-Evangelia and Papadakos, Panagiotis and Mountantonakis, Michalis and Tzitzikas, Yannis},
  booktitle = {EDBT/ICDT Workshops},
  title     = {An Interactive 3D Visualization for the LOD Cloud.},
  year      = {2018},
  pages     = {100--103},
}

@InProceedings{Acosta2017,
  author    = {Acosta, Maribel and Vidal, Maria-Esther and Sure-Vetter, York},
  booktitle = {The Semantic Web -- ISWC 2017},
  title     = {Diefficiency Metrics: Measuring the Continuous Efficiency of Query Processing Approaches},
  year      = {2017},
  address   = {Cham},
  editor    = {d'Amato, Claudia and Fernandez, Miriam and Tamma, Valentina and Lecue, Freddy and Cudr{\'e}-Mauroux, Philippe and Sequeda, Juan and Lange, Christoph and Heflin, Jeff},
  pages     = {3--19},
  publisher = {Springer International Publishing},
  abstract  = {During empirical evaluations of query processing techniques, metrics like execution time, time for the first answer, and throughput are usually reported. Albeit informative, these metrics are unable to quantify and evaluate the efficiency of a query engine over a certain time period -- or diefficiency --, thus hampering the distinction of cutting-edge engines able to exhibit high-performance gradually. We tackle this issue and devise two experimental metrics named dief@t and dief@k, which allow for measuring the diefficiency during an elapsed time period t or while k answers are produced, respectively. The dief@t and dief@k measurement methods rely on the computation of the area under the curve of answer traces, and thus capturing the answer concentration over a time interval. We report experimental results of evaluating the behavior of a generic SPARQL query engine using both metrics. Observed results suggest that dief@t and dief@k are able to measure the performance of SPARQL query engines based on both the amount of answers produced by an engine and the time required to generate these answers.},
  file      = {:references/Diefficiency Metrics Measuring the Continuous Efficiency of Query Processing Approaches.pdf:PDF},
  groups    = {metric},
  isbn      = {978-3-319-68204-4},
}

@InProceedings{taelman_iswc_resources_comunica_2018,
  author    = {Taelman, Ruben and Van Herwegen, Joachim and Vander Sande, Miel and Verborgh, Ruben},
  booktitle = {Proceedings of the 17th International Semantic Web Conference},
  title     = {Comunica: a Modular SPARQL Query Engine for the Web},
  year      = {2018},
  month     = oct,
  url       = {https://comunica.github.io/Article-ISWC2018-Resource/},
}

@Article{taelman_swj_componentsjs_2022,
  author  = {Taelman, Ruben and Van Herwegen, Joachim and Vander Sande, Miel and Verborgh, Ruben},
  journal = {Semantic Web Journal},
  title   = {Components.js: Semantic Dependency Injection},
  year    = {2022},
  month   = jan,
  url     = {https://linkedsoftwaredependencies.github.io/Article-System-Components/},
}

@InProceedings{Hanski2024,
  author    = {{Hanski, Jonni and Taelman, Ruben and Verborgh, Ruben}},
  booktitle = {{ESWC2024, the European Semantic Web Conference}},
  title     = {{Observations on bloom filters for traversal-based query execution over solid pods}},
  year      = {{2024}},
  pages     = {{5}},
  language  = {{und}},
  location  = {{Hersonissos, Greece}},
}

@Article{Angles2020,
  author        = {Renzo Angles and J{\'{a}}nos Benjamin Antal and Alex Averbuch and Peter A. Boncz and Orri Erling and Andrey Gubichev and Vlad Haprian and Moritz Kaufmann and Josep{-}Llu{\'{\i}}s Larriba{-}Pey and Norbert Mart{\'{\i}}nez{-}Bazan and J{\'{o}}zsef Marton and Marcus Paradies and Minh{-}Duc Pham and Arnau Prat{-}P{\'{e}}rez and Mirko Spasic and Benjamin A. Steer and G{\'{a}}bor Sz{\'{a}}rnyas and Jack Waudby},
  journal       = {CoRR},
  title         = {The {LDBC} {S}ocial {N}etwork {B}enchmark},
  year          = {2020},
  volume        = {abs/2001.02299},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2001-02299.bib},
  eprint        = {2001.02299},
  timestamp     = {Mon, 13 Jan 2020 12:40:17 +0100},
  url           = {http://arxiv.org/abs/2001.02299},
}

@InProceedings{eschauzier_amw_rcubemetric_2024,
  author    = {Eschauzier, Ruben and Taelman, Ruben and Verborgh, Ruben},
  booktitle = {Proceedings of the 16th Alberto Mendelzon International Workshop on Foundations of Data Management},
  title     = {The R3 Metric: Measuring Performance of Link Prioritization during Traversal-based Query Processing},
  year      = {2024},
  month     = sep,
  url       = {https://rubeneschauzier.github.io/traversal-metric-workshop-paper/},
}

@InProceedings{Prestamo2023,
  author    = {Pr{\'e}stamo, {\'A}ngel Iglesias and Gayo, Jose Emilio Labra},
  booktitle = {Knowledge Graphs and Semantic Web},
  title     = {Using Pregel to Create Knowledge Graphs Subsets Described by Non-recursive Shape Expressions},
  year      = {2023},
  address   = {Cham},
  editor    = {Ortiz-Rodriguez, Fernando and Villaz{\'o}n-Terrazas, Boris and Tiwari, Sanju and Bobed, Carlos},
  pages     = {120--134},
  publisher = {Springer Nature Switzerland},
  abstract  = {Knowledge Graphs have been successfully adopted in recent years, existing general-purpose ones, like Wikidata, as well as domain-specific ones, like UniProt. Their increasing size poses new challenges to their practical usage. As an example, Wikidata has been growing the size of its contents and their data since its inception making it difficult to download and process its data. Although the structure of Wikidata items is flexible, it tends to be heterogeneous: the shape of an entity representing a human is distinct from that of a mountain. Recently, Wikidata adopted Entity Schemas to facilitate the definition of different schemas using Shape Expressions, a language that can be used to describe and validate RDF data. In this paper, we present an approach to obtain subsets of knowledge graphs based on Shape Expressions that use an implementation of the Pregel algorithm implemented in Rust. We have applied our approach to obtain subsets of Wikidata and UniProt and present some of these experiments' results.},
  groups    = {shape},
  isbn      = {978-3-031-47745-4},
}

@Comment{jabref-meta: databaseType:bibtex;}

@Comment{jabref-meta: grouping:
0 AllEntriesGroup:;
1 StaticGroup:charateristic set\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:federated queries\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:Link Traversal Query Processing\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:query containment\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:query containment bag semantic\;0\;0\;0x8a8a8aff\;\;\;;
1 StaticGroup:query containment with RDF\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:query decomposition\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:query plan LTQP\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:Query plan with shape\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:query planning\;0\;0\;0x8a8a8aff\;\;\;;
1 StaticGroup:reading\;0\;0\;0x8a8a8aff\;\;\;;
1 StaticGroup:shape\;0\;0\;0x8a8a8aff\;\;\;;
1 StaticGroup:shape containment\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:protocol\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:analysis of KG\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:Solid\;0\;1\;\;\;\;;
1 StaticGroup:data sumary\;0\;1\;\;\;\;;
1 StaticGroup:idea to try\;0\;1\;\;\;\;;
1 StaticGroup:hypermedia\;0\;1\;\;\;\;;
1 StaticGroup:query-containment with schema\;0\;1\;\;\;\;;
1 StaticGroup:convertion shape to query\;0\;1\;\;\;\;;
1 StaticGroup:SPARQL\;0\;1\;\;\;\;;
1 StaticGroup:source selection\;0\;1\;\;\;\;;
1 StaticGroup:metric\;0\;1\;\;\;\;;
}