-
Notifications
You must be signed in to change notification settings - Fork 2
/
references.bib
1698 lines (1577 loc) · 115 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{Bonifati2019,
title = {An analytical study of large SPARQL query logs},
volume = {29},
ISSN = {0949-877X},
url = {http://dx.doi.org/10.1007/s00778-019-00558-9},
DOI = {10.1007/s00778-019-00558-9},
number = {2–3},
journal = {The VLDB Journal},
publisher = {Springer Science and Business Media LLC},
author = {Bonifati, Angela and Martens, Wim and Timm, Thomas},
year = {2019},
month = aug,
pages = {655–679}
}
@inproceedings{dia2018fast,
title={Fast SPARQL join processing between distributed streams and stored RDF graphs using bloom filters},
author={Dia, Amadou Fall and Aoul, Zakia Kazi and Boly, Aliou and M{\'e}tais, Elisabeth},
booktitle={2018 12th International Conference on Research Challenges in Information Science (RCIS)},
pages={1--12},
year={2018},
organization={IEEE}
}
@misc{shapetreesShapeTrees,
author = {},
title = {{S}hape {T}rees {S}pecification --- shapetrees.org},
howpublished = {\url{https://shapetrees.org/TR/specification/}},
year = {Eric Prud'hommeaux, Justin Bingham },
note = {[Accessed 06-12-2024]},
}
@misc{w3SPARQLQuery,
author = {W3C},
title = {{S}{P}{A}{R}{Q}{L} 1.1 {Q}uery {L}anguage --- w3.org},
howpublished = {\url{https://www.w3.org/TR/sparql11-query/}},
year = {},
note = {[Accessed 04-12-2024]},
}
@InProceedings{tam2024opportunitiesshapebasedoptimizationlink,
author = {Tam, Bryan-Elliott and Taelman, Ruben and Colpaert, Pieter and Verborgh, Ruben},
booktitle = {Proceedings of the 16th Alberto Mendelzon International Workshop on Foundations of Data Management},
title = {Opportunities for Shape-based Optimization of Link Traversal Queries},
year = {2024},
month = sep,
url = {https://arxiv.org/pdf/2407.00998v2},
}
@InProceedings{verstraete2022solid,
author = {Verstraete, Melanie and Verbrugge, Sofie and Colle, Didier},
booktitle = {ITS},
title = {Solid: Enabler of decentralized, digital platforms ecosystems},
year = {2022},
pages = {1--19},
file = {:article/Solid Enabler of decentralized, digital platforms ecosystems:},
groups = {Commodification internet},
}
@article{taelman2024towards,
title={Towards Applications on the Decentralized Web using Hypermedia-driven Query Engines},
author={Taelman, Ruben},
journal={ACM SIGWEB Newsletter},
volume={2024},
number={Summer},
pages={1--9},
year={2024},
publisher={ACM New York, NY, USA}
}
@inproceedings{hose2012towards,
title={Towards benefit-based RDF source selection for SPARQL queries},
author={Hose, Katja and Schenkel, Ralf},
booktitle={Proceedings of the 4th International Workshop on Semantic Web Information Management},
pages={1--8},
year={2012}
}
@inproceedings{Harth2010,
author = {Harth, Andreas and Hose, Katja and Karnstedt, Marcel and Polleres, Axel and Sattler, Kai-Uwe and Umbrich, J\"{u}rgen},
title = {Data summaries for on-demand queries over linked data},
year = {2010},
isbn = {9781605587998},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1772690.1772733},
doi = {10.1145/1772690.1772733},
abstract = {Typical approaches for querying structured Web Data collect (crawl) and pre-process (index) large amounts of data in a central data repository before allowing for query answering. However, this time-consuming pre-processing phase however leverages the benefits of Linked Data -- where structured data is accessible live and up-to-date at distributed Web resources that may change constantly -- only to a limited degree, as query results can never be current. An ideal query answering system for Linked Data should return current answers in a reasonable amount of time, even on corpora as large as the Web. Query processors evaluating queries directly on the live sources require knowledge of the contents of data sources. In this paper, we develop and evaluate an approximate index structure summarising graph-structured content of sources adhering to Linked Data principles, provide an algorithm for answering conjunctive queries over Linked Data on theWeb exploiting the source summary, and evaluate the system using synthetically generated queries. The experimental results show that our lightweight index structure enables complete and up-to-date query results over Linked Data, while keeping the overhead for querying low and providing a satisfying source ranking at no additional cost.},
booktitle = {Proceedings of the 19th International Conference on World Wide Web},
pages = {411–420},
numpages = {10},
keywords = {index structures, linked data, rdf querying},
location = {Raleigh, North Carolina, USA},
series = {WWW '10}
}
@InProceedings{aranda2013,
author = {Buil-Aranda, Carlos and Hogan, Aidan and Umbrich, J\"{u}rgen and Vandenbussche, Pierre-Yves},
booktitle = {Proceedings 12th ISWC},
title = {SPARQL Web-Querying Infrastructure: Ready for Action?},
year = {2013},
address = {Berlin, Heidelberg},
pages = {277–293},
publisher= {Springer-Verlag},
series = {ISWC '13},
doi = {10.1007/978-3-642-41338-4_18},
file = {:articles/SPARQL Web-Querying Infrastructure.pdf:PDF},
groups = {rdf statistic, statistic},
isbn = {9783642413377},
numpages = {17},
url = {https://doi.org/10.1007/978-3-642-41338-4_18},
}
@article{LabraGayo2023,
title = {RDF Data integration using Shape Expressions},
url = {http://dx.doi.org/10.37044/osf.io/md73k},
DOI = {10.37044/osf.io/md73k},
publisher = {Center for Open Science},
author = {Labra-Gayo, Jose Emilio and Waagmeester, Andra and Iglesias-Préstamo, Ángel and Yamamoto, Yasunori and Katayama, Toshiaki and Liener, Thomas and Unni, Deepak and Bolleman, Jerven and Aoki-Kinoshita, Kiyoko Flora and Yokochi, Masashi and Queralt-Rosinach, Núria and Mori, Hiroshi and Álvarez, Daniel Fernández and Labarga, Alberto and Benítez-Andrades, José Alberto and Hoehndorf, Robert and Prud’hommeaux, Eric and Nanjo, Claude and Thalhath, Nishad},
year = {2023},
month = jul
}
@article{fernandez2023extracting,
title={Extracting shapes from large RDF data collections},
author={Fern{\'a}ndez-{\'A}lvarez, Daniel and Yamamoto, Yasunori and Labra-Gayo, Jose Emilio and Waagmeester, Andra},
year={2023}
}
@InProceedings{9663788,
author = {Mechant, Peter and De Wolf, Ralf and Van Compernolle, Mathias and Joris, Glen and Evens, Tom and De Marez, Lieven},
booktitle = {2021 14th CMI International Conference},
title = {Saving the web by decentralizing data networks? A socio-technical reflection on the promise of decentralization and personal data stores},
year = {2021},
pages = {1-6},
doi = {10.1109/CMI53512.2021.9663788},
file = {:article/Saving the web by decentralizing data networks?
A socio-technical reflection on the promise of
decentralization and personal data stores.pdf:PDF},
groups = {Commodification internet},
}
@inproceedings{tam_iswc_traversalsensortree_2024,
author = {Tam, Bryan-Elliott and Taelman, Ruben and Rojas Meléndez, Julián and Colpaert, Pieter},
title = {Optimizing Traversal Queries of Sensor Data Using a Rule-Based Reachability Approach},
month = nov,
booktitle = {Proceedings of the 23rd International Semantic Web Conference: Posters and Demos},
year = {2024},
url = {https://arxiv.org/pdf/2408.17157}
}
@InProceedings{9663788,
author = {Mechant, Peter and De Wolf, Ralf and Van Compernolle, Mathias and Joris, Glen and Evens, Tom and De Marez, Lieven},
booktitle = {2021 14th CMI International Conference},
title = {Saving the web by decentralizing data networks? A socio-technical reflection on the promise of decentralization and personal data stores},
year = {2021},
pages = {1-6},
doi. = {10.1109/CMI53512.2021.9663788},
file = {:article/Saving the web by decentralizing data networks?
A socio-technical reflection on the promise of
decentralization and personal data stores.pdf:PDF},
groups = {Commodification internet},
}
@Article{Terranova2000FreeLP,
author = {Tiziana Terranova},
journal = {Social Text},
title = {Free Labor: Producing Culture for the Digital Economy},
year = {2000},
file = {:article/Free Labor Producing Culture for the Digital Economy.pdf:PDF},
groups = {Commodification internet},
pages = {33 - 58},
priority = {prio1},
ranking = {rank5},
readstatus = {read},
volume = {18},
}
@InCollection{Curran2016ch1,
author = {James Curran},
booktitle = {Misunderstanding the Internet},
publisher = {Routledge 2012},
title = {The internet of dreams Reinterpreting the internet},
year = {2016},
chapter = {1},
groups = {Commodification internet},
}
@Article{Sevignani2013,
author = {S. Sevignani},
journal = {Science and Public Policy},
title = {The commodification of privacy on the Internet},
year = {2013},
month = nov,
number = {6},
pages = {733--739},
volume = {40},
doi = {10.1093/scipol/sct082},
file = {:article/The commodification of privacy on the internet.pdf:PDF},
groups = {Commodification internet},
priority = {prio1},
publisher = {Oxford University Press ({OUP})},
ranking = {rank5},
readstatus = {read},
url = {https://doi.org/10.1093/scipol/sct082},
}
@article{Karim2020,
title = {Compacting frequent star patterns in RDF graphs},
volume = {55},
ISSN = {1573-7675},
url = {http://dx.doi.org/10.1007/s10844-020-00595-9},
DOI = {10.1007/s10844-020-00595-9},
number = {3},
journal = {Journal of Intelligent Information Systems},
publisher = {Springer Science and Business Media LLC},
author = {Karim, Farah and Vidal, Maria-Esther and Auer, S\"{o}ren},
year = {2020},
month = apr,
pages = {561–585}
}
@InProceedings{9663788,
author = {Mechant, Peter and De Wolf, Ralf and Van Compernolle, Mathias and Joris, Glen and Evens, Tom and De Marez, Lieven},
booktitle = {2021 14th CMI International Conference},
title = {Saving the web by decentralizing data networks? A socio-technical reflection on the promise of decentralization and personal data stores},
year = {2021},
pages = {1-6},
doi = {10.1109/CMI53512.2021.9663788},
file = {:article/Saving the web by decentralizing data networks?
A socio-technical reflection on the promise of
decentralization and personal data stores.pdf:PDF},
groups = {Commodification internet},
}
@article{hartigLDQL,
title = {LDQL: A query language for the Web of Linked Data},
journal = {Journal of Web Semantics},
volume = {41},
pages = {9-29},
year = {2016},
issn = {1570-8268},
doi = {https://doi.org/10.1016/j.websem.2016.10.001},
url = {https://www.sciencedirect.com/science/article/pii/S1570826816300476},
author = {Olaf Hartig and Jorge Pérez},
keywords = {Linked Data, Query language, Foundations, SPARQL, Queries},
abstract = {The Web of Linked Data is composed of tons of RDF documents interlinked to each other forming a huge repository of distributed semantic data. Effectively querying this distributed data source is an important open problem in the Semantic Web area. In this paper, we propose LDQL, a declarative language to query Linked Data on the Web. One of the novelties of LDQL is that it expresses separately (i) patterns that describe the expected query result, and (ii) Web navigation paths that select the data sources to be used for computing the result. We present a formal syntax and semantics, prove equivalence rules, and study the expressiveness of the language. In particular, we show that LDQL is strictly more expressive than all the query formalisms that have been proposed previously for Linked Data on the Web. We also study some computability issues regarding LDQL. We first prove that when considering the Web of Linked Data as a fully accessible graph, the evaluation problem for LDQL can be solved in polynomial time. Nevertheless, when the limited data access capabilities of Web clients are considered, the scenario changes drastically; there are LDQL queries for which a complete execution is not possible in practice. We formally study this issue and provide a sufficient syntactic condition to avoid this problem; queries satisfying this condition are ensured to have a procedure to be effectively evaluated over the Web of Linked Data.}
}
@inproceedings{Fujimoto2022,
title = {A Simple Algorithm for Checking Pattern Query Containment under Shape Expression Schema},
url = {http://dx.doi.org/10.5220/0011536800003318},
DOI = {10.5220/0011536800003318},
booktitle = {Proceedings of the 18th International Conference on Web Information Systems and Technologies},
publisher = {SCITEPRESS - Science and Technology Publications},
author = {Fujimoto, Haruna and Suzuki, Nobutaka},
year = {2022}
}
@Article{Spasi2023,
author = {Mirko Spasi{\'{c}} and Milena Vujo{\v{s}}evi{\'{c}} Jani{\v{c}}i{\'{c}}},
journal = {Journal of Web Semantics},
title = {Solving the {SPARQL} query containment problem with {SpeCS}},
year = {2023},
month = apr,
pages = {100770},
volume = {76},
comment = {I have been told to be careful about the claim of that paper because it has been rejected by another journal because there was a problem in the approach.},
doi = {10.1016/j.websem.2022.100770},
file = {:papers/Solving the SPARQL query containment problem with SpeCS.pdf:PDF},
groups = {query containment bag semantic, query containment with RDF},
priority = {prio1},
publisher = {Elsevier {BV}},
url = {https://doi.org/10.1016/j.websem.2022.100770},
}
@InBook{afariQCE,
author = {Foto Afrati, Rada Chirkova},
chapter = {2},
pages = {21-59},
publisher = {Springer Cham},
title = {Query Containment and Equivalence},
year = {2019},
booktitle = {Answering Queries Using Views, Second Edition},
doi = {https://doi.org/10.1007/978-3-031-01871-8},
file = {:papers/Query Containment and Equivalence.pdf:PDF;:papers/Query Containment and Equivalence/p23.jpg:JPG image},
groups = {query containment},
ranking = {rank5},
readstatus = {read},
}
@InBook{afariAQD,
author = {Foto Afrati, Rada Chirkova},
chapter = {5},
pages = {125-142},
publisher = {Springer Cham},
title = {Answering Queries in Presence of Dependencies},
year = {2019},
booktitle = {Answering Queries Using Views, Second Edition},
doi = {https://doi.org/10.1007/978-3-031-01871-8},
file = {:papers/Answering Queries in Presence of Dependencies.pdf:PDF},
groups = {query containment},
priority = {prio3},
}
@Article{10.1145/3472391,
author = {Khamis, Mahmoud Abo and Kolaitis, Phokion G. and Ngo, Hung Q. and Suciu, Dan},
journal = {ACM Trans. Database Syst.},
title = {Bag Query Containment and Information Theory},
year = {2021},
issn = {0362-5915},
month = {sep},
number = {3},
volume = {46},
abstract = {The query containment problem is a fundamental algorithmic problem in data management. While this problem is well understood under set semantics, it is by far less understood under bag semantics. In particular, it is a long-standing open question whether or not the conjunctive query containment problem under bag semantics is decidable. We unveil tight connections between information theory and the conjunctive query containment under bag semantics. These connections are established using information inequalities, which are considered to be the laws of information theory. Our first main result asserts that deciding the validity of a generalization of information inequalities is many-one equivalent to the restricted case of conjunctive query containment in which the containing query is acyclic; thus, either both these problems are decidable or both are undecidable. Our second main result identifies a new decidable case of the conjunctive query containment problem under bag semantics. Specifically, we give an exponential-time algorithm for conjunctive query containment under bag semantics, provided the containing query is chordal and admits a simple junction tree.},
address = {New York, NY, USA},
articleno = {12},
doi = {10.1145/3472391},
file = {:papers/Bag Query Containment and Information Theory.pdf:PDF},
groups = {reading, query containment bag semantic},
issue_date = {September 2021},
keywords = {information theory, entropy, Query containment, bag semantics},
numpages = {39},
priority = {prio1},
publisher = {Association for Computing Machinery},
url = {https://doi.org/10.1145/3472391},
}
@Article{Chekol2018,
author = {Melisachew Wudage Chekol and J{\'{e}}r{\^{o}}me Euzenat and Pierre Genev{\`{e}}s and Nabil Layaïda},
journal = {Journal on Data Semantics},
title = {{SPARQL} Query Containment Under Schema},
year = {2018},
month = may,
number = {3},
pages = {133--154},
volume = {7},
doi = {10.1007/s13740-018-0087-1},
file = {:papers/SPARQL Query Containment under Schema.pdf:PDF},
groups = {query containment bag semantic, query containment with RDF, query-containment with schema},
priority = {prio1},
publisher = {Springer Science and Business Media {LLC}},
url = {https://doi.org/10.1007/s13740-018-0087-1},
}
@Misc{2112.11796,
author = {Thomas Delva and Anastasia Dimou and Maxime Jakubowski and Jan Van den Bussche},
title = {Shape Fragments},
year = {2021},
comment = {It's a bit complicated to understand but I think the idea of converting the shape into a query or converting the shape directly into a canonical database (not in the paper) might be a very good approach. But the query are complicated and the software proposed is a naive implementation.},
eprint = {arXiv:2112.11796},
file = {:papers/Shape Fragments.pdf:PDF},
groups = {shape},
ranking = {rank4},
readstatus = {skimmed},
}
@Misc{koutisLecture,
author = {Paris Koutris},
title = {Lecture 2: Query Containment},
file = {:papers/Koutris_query_containment.pdf:PDF},
groups = {query containment},
}
@InBook{Chirkova2009,
author = {Chirkova, Rada},
editor = {LIU, LING and {\"O}ZSU, M. TAMER},
pages = {2249--2253},
publisher = {Springer US},
title = {Query Containment},
year = {2009},
address = {Boston, MA},
isbn = {978-0-387-39940-9},
booktitle = {Encyclopedia of Database Systems},
doi = {10.1007/978-0-387-39940-9_1269},
file = {:papers/Query Containment.pdf:PDF},
groups = {query containment},
priority = {prio1},
url = {https://doi.org/10.1007/978-0-387-39940-9_1269},
}
@InProceedings{10.1145/275487.275511,
author = {Kolaitis, Phokion G. and Vardi, Moshe Y.},
booktitle = {Proceedings of the Seventeenth ACM SIGACT-SIGMOD-SIGART Symposium on Principles of Database Systems},
title = {Conjunctive-Query Containment and Constraint Satisfaction},
year = {1998},
address = {New York, NY, USA},
pages = {205–213},
publisher = {Association for Computing Machinery},
series = {PODS '98},
doi = {10.1145/275487.275511},
file = {:papers/Conjunctive query containment and constraint satisfaction.pdf:PDF},
groups = {query containment},
isbn = {0897919963},
location = {Seattle, Washington, USA},
numpages = {9},
priority = {prio1},
url = {https://doi.org/10.1145/275487.275511},
}
@InProceedings{Ullman1997,
author = {Ullman, Jeffrey D.},
booktitle = {Database Theory --- ICDT '97},
title = {Information integration using logical views},
year = {1997},
address = {Berlin, Heidelberg},
editor = {Afrati, Foto and Kolaitis, Phokion},
pages = {19--40},
publisher = {Springer Berlin Heidelberg},
abstract = {A number of ideas concerning information-integration tools can be thought of as constructing answers to queries using views that represent the capabilities of information sources. We review the formal basis of these techniques, which are closely related to containment algorithms for conjunctive queries and/or Datalog programs. Then we compare the approaches taken by AT{\&}T Labs' ``Information Manifold'' and the Stanford ``Tsimmis'' project in these terms.},
file = {:papers/Information integration using logical views.pdf:PDF},
groups = {query containment},
isbn = {978-3-540-49682-3},
priority = {prio2},
}
@InProceedings{Boneva2017,
author = {Boneva, Iovka and Labra Gayo, Jose E. and Prud’hommeaux, Eric G.},
booktitle = {The Semantic Web – ISWC 2017: 16th International Semantic Web Conference, Vienna, Austria, October 21–25, 2017, Proceedings, Part I},
title = {Semantics and Validation of Shapes Schemas for RDF},
year = {2017},
address = {Berlin, Heidelberg},
pages = {104–120},
publisher = {Springer-Verlag},
abstract = {We present a formal semantics and proof of soundness for shapes schemas, an expressive schema language for RDF graphs that is the foundation of Shape Expressions Language 2.0. It can be used to describe the vocabulary and the structure of an RDF graph, and to constrain the admissible properties and values for nodes in that graph. The language defines a typing mechanism called shapes against which nodes of the graph can be checked. It includes an algebraic grouping operator, a choice operator and cardinality constraints for the number of allowed occurrences of a property. Shapes can be combined using Boolean operators, and can use possibly recursive references to other shapes.We describe the syntax of the language and define its semantics. The semantics is proven to be well-defined for schemas that satisfy a reasonable syntactic restriction, namely stratified use of negation and recursion. We present two algorithms for the validation of an RDF graph against a shapes schema. The first algorithm is a direct implementation of the semantics, whereas the second is a non-trivial improvement. We also briefly give implementation guidelines.},
comment-id357 = {Cet article a une bonne formulation des formes, je pense qu'il peut être un bon point de départ pour ma propre formulation, il y a aussi une bonne gestion des négations. Par contre le problème de transformer les formes dans une formulation compatible avec le problème de containement reste un problème.},
doi = {10.1007/978-3-319-68288-4_7},
file = {:papers/Semantics and Validation of Shapes Schemas for RDF.pdf:PDF},
groups = {shape},
isbn = {978-3-319-68287-7},
location = {Vienna, Austria},
numpages = {17},
ranking = {rank5},
readstatus = {read},
url = {https://doi.org/10.1007/978-3-319-68288-4_7},
}
@Misc{afrati2020complexity,
author = {Foto N. Afrati and Matthew Damigos},
title = {On the complexity of query containment and computing certain answers in the presence of ACs},
year = {2020},
archiveprefix = {arXiv},
comment-id357 = {Help to explain the cannonical database for query with arithmetic comparator},
eprint = {2008.10986},
file = {:papers/On the Complexity of Query Containment and Computing Certain Answers in the Presence of ACs.pdf:PDF},
groups = {query containment},
primaryclass = {cs.DB},
ranking = {rank4},
readstatus = {skimmed},
}
@Article{Leinberger2020DecidingSS,
author = {Martin Leinberger and Philipp Seifer and Tjitze Rienstra and Ralf Lammel and Steffen Staab},
journal = {ArXiv},
title = {Deciding SHACL Shape Containment through Description Logics Reasoning (Extended Version)},
year = {2020},
volume = {abs/2008.13603},
file = {:papers/Deciding SHACL Shape Containment through Description Logics Reasoning Extended Version.pdf:PDF},
groups = {shape containment},
priority = {prio1},
url = {https://api.semanticscholar.org/CorpusID:221377301},
}
@Article{Pareti2020SHACLSA,
author = {Paolo Pareti and G. Konstantinidis and Fabio Mogavero and Timothy J. Norman},
journal = {ArXiv},
title = {SHACL Satisfiability and Containment (Extended Paper)},
year = {2020},
volume = {abs/2009.09806},
file = {:papers/SHACL Satisfiability and Containment Extended Paper.pdf:PDF},
groups = {shape containment},
priority = {prio1},
url = {https://api.semanticscholar.org/CorpusID:221818992},
}
@Article{Staworko2018ContainmentOS,
author = {Slawomir Staworko and Piotr Wieczorek},
journal = {Proceedings of the 38th ACM SIGMOD-SIGACT-SIGAI Symposium on Principles of Database Systems},
title = {Containment of Shape Expression Schemas for RDF},
year = {2018},
file = {:papers/Containment of Shape Expression Schemas for RDF.pdf:PDF},
groups = {shape containment, reading},
priority = {prio1},
url = {https://api.semanticscholar.org/CorpusID:4337350},
}
@Article{Afrati2010,
author = {Foto N. Afrati and Matthew Damigos and Manolis Gergatsoulis},
journal = {Information Processing Letters},
title = {Query containment under bag and bag-set semantics},
year = {2010},
month = apr,
number = {10},
pages = {360--369},
volume = {110},
comment = {Very good paper to solve the QC problem with bag semantic-set describe well their approach and the complexity. Will be useful, if I need to implement something or to describe theoritical points.},
doi = {10.1016/j.ipl.2010.02.017},
file = {:papers/Query containment under bag and bag set semantics.pdf:PDF},
groups = {query containment bag semantic},
priority = {prio1},
publisher = {Elsevier {BV}},
ranking = {rank5},
readstatus = {read},
url = {https://doi.org/10.1016/j.ipl.2010.02.017},
}
@Misc{kopparty2010homomorphism,
author = {Swastik Kopparty and Benjamin Rossman},
title = {The Homomorphism Domination Exponent},
year = {2010},
archiveprefix = {arXiv},
eprint = {1004.2485},
file = {:papers/The Homomorphism Domination Exponent.pdf:PDF},
groups = {query containment bag semantic},
primaryclass = {math.CO},
}
@Article{Saleem2018,
author = {Muhammad Saleem and Ali Hasnain and Axel-Cyrille Ngonga Ngomo},
journal = {Journal of Web Semantics},
title = {{LargeRDFBench}: A billion triples benchmark for {SPARQL} endpoint federation},
year = {2018},
month = jan,
pages = {85--125},
volume = {48},
doi = {10.1016/j.websem.2017.12.005},
file = {:papers/LargeRDFBench A billion triples benchmark for SPARQL endpoint.pdf:PDF},
groups = {federated queries},
publisher = {Elsevier {BV}},
url = {https://doi.org/10.1016/j.websem.2017.12.005},
}
@InProceedings{10.1145/153850.153856,
author = {Chaudhuri, Surajit and Vardi, Moshe Y.},
booktitle = {Proceedings of the Twelfth ACM SIGACT-SIGMOD-SIGART Symposium on Principles of Database Systems},
title = {Optimization of Real Conjunctive Queries},
year = {1993},
address = {New York, NY, USA},
pages = {59–70},
publisher = {Association for Computing Machinery},
series = {PODS '93},
doi = {10.1145/153850.153856},
file = {:papers/Optimization of real conjunctive queries.pdf:PDF},
groups = {query containment bag semantic},
isbn = {0897915933},
location = {Washington, D.C., USA},
numpages = {12},
url = {https://doi.org/10.1145/153850.153856},
}
@Online{spapeExpressionConvert,
author = {W3C},
file = {:papers/SPARQL Queries to Validate Shape Expressions.pdf:PDF},
groups = {shape},
priority = {prio1},
title = {SPARQL Queries to Validate Shape Expressions (informative)},
url = {https://www.w3.org/2013/ShEx/toSPARQL.html},
urldate = {2023-09-11},
year = {2013},
}
@InProceedings{Abbas2017,
author = {Abbas, Abdullah and Genev{\`e}s, Pierre and Roisin, C{\'e}cile and Laya{\"i}da, Nabil},
booktitle = {Advances in Databases and Information Systems},
title = {SPARQL Query Containment with ShEx Constraints},
year = {2017},
address = {Cham},
editor = {Kirikova, M{\={a}}r{\={\i}}te and N{\o}rv{\aa}g, Kjetil and Papadopoulos, George A.},
pages = {343--356},
publisher = {Springer International Publishing},
abstract = {ShEx (Shape Expressions) is a language for expressing constraints on RDF graphs. We consider the problem of SPARQL query containment in the presence of ShEx constraints. We first propose a sound and complete procedure for the problem of containment with ShEx, considering several SPARQL fragments. Particularly our procedure considers OPTIONAL query patterns, that turns out to be an important fragment to be studied with schemas. We then show the complexity bounds of our problem with respect to the fragments considered. To the best of our knowledge, this is the first work addressing SPARQL query containment in the presence of ShEx constraints.},
comment = {what they do is that they use the shape to transform the queries that have filters to delete them when the query will for certain don't respect the containt of the data. Given this transformation some queries can become equivalent. Also if the query is only a BGP then we can simply validate it against the shape if we delete the cardinatity of the operators.},
file = {:papers/SPARQL Query Containment with ShEx Constraints.pdf:PDF},
groups = {query containment with RDF, query containment bag semantic, query-containment with schema},
isbn = {978-3-319-66917-5},
ranking = {rank5},
readstatus = {read},
}
@InProceedings{Kostylev2015,
author = {Kostylev, Egor V. and Reutter, Juan L. and Romero, Miguel and Vrgo{\v{c}}, Domagoj},
booktitle = {The Semantic Web - ISWC 2015},
title = {SPARQL with Property Paths},
year = {2015},
address = {Cham},
editor = {Arenas, Marcelo and Corcho, Oscar and Simperl, Elena and Strohmaier, Markus and d'Aquin, Mathieu and Srinivas, Kavitha and Groth, Paul and Dumontier, Michel and Heflin, Jeff and Thirunarayan, Krishnaprasad and Thirunarayan, Krishnaprasad and Staab, Steffen},
pages = {3--18},
publisher = {Springer International Publishing},
abstract = {The original SPARQL proposal was often criticized for its inability to navigate through the structure of RDF documents. For this reason property paths were introduced in SPARQL 1.1, but up to date there are no theoretical studies examining how their addition to the language affects main computational tasks such as query evaluation, query containment, and query subsumption. In this paper we tackle all of these problems and show that although the addition of property paths has no impact on query evaluation, they do make the containment and subsumption problems substantially more difficult.},
file = {:references/SPARQL with Property Paths.pdf:PDF},
groups = {query containment with RDF, query-containment with schema},
isbn = {978-3-319-25007-6},
}
@InProceedings{10.1145/2594538.2594542,
author = {Pichler, Reinhard and Skritek, Sebastian},
booktitle = {Proceedings of the 33rd ACM SIGMOD-SIGACT-SIGART Symposium on Principles of Database Systems},
title = {Containment and Equivalence of Well-Designed SPARQL},
year = {2014},
address = {New York, NY, USA},
pages = {39–50},
publisher = {Association for Computing Machinery},
series = {PODS '14},
abstract = {Query containment and query equivalence constitute important computational problems in the context of static query analysis and optimization. While these problems have been intensively studied for fragments of relational calculus, almost no works exist for the semantic web query language SPARQL. In this paper, we carry out a comprehensive complexity analysis of containment and equivalence for several fragments of SPARQL: we start with the fundamental fragment of well-designed SPARQL restricted to the AND and OPTIONAL operator. We then study basic extensions in the form of the UNION operator and/or projection. The results obtained range from NP-completeness to undecidability.},
doi = {10.1145/2594538.2594542},
file = {:papers/Containment and Equivalence of Well Designed SPARQL.pdf:PDF},
groups = {query containment with RDF},
isbn = {9781450323758},
keywords = {query containment, RDF, semantic web, SPARQL},
location = {Snowbird, Utah, USA},
numpages = {12},
url = {https://doi.org/10.1145/2594538.2594542},
}
@Article{10.1145/2500130,
author = {Letelier, Andr\'{e}s and P\'{e}rez, Jorge and Pichler, Reinhard and Skritek, Sebastian},
journal = {ACM Trans. Database Syst.},
title = {Static Analysis and Optimization of Semantic Web Queries},
year = {2013},
issn = {0362-5915},
month = {dec},
number = {4},
volume = {38},
abstract = {Static analysis is a fundamental task in query optimization. In this article we study static analysis and optimization techniques for SPARQL, which is the standard language for querying Semantic Web data. Of particular interest for us is the optionality feature in SPARQL. It is crucial in Semantic Web data management, where data sources are inherently incomplete and the user is usually interested in partial answers to queries. This feature is one of the most complicated constructors in SPARQL and also the one that makes this language depart from classical query languages such as relational conjunctive queries. We focus on the class of well-designed SPARQL queries, which has been proposed in the literature as a fragment of the language with good properties regarding query evaluation. We first propose a tree representation for SPARQL queries, called pattern trees, which captures the class of well-designed SPARQL graph patterns. Among other results, we propose several rules that can be used to transform pattern trees into a simple normal form, and study equivalence and containment. We also study the evaluation and enumeration problems for this class of queries.},
address = {New York, NY, USA},
articleno = {25},
doi = {10.1145/2500130},
file = {:papers/Static Analysis and Optimization of Semantic Web Queries.pdf:PDF},
groups = {query containment with RDF},
issue_date = {November 2013},
keywords = {query containment, Semantic Web, RDF, SPARQL, Optimization},
numpages = {45},
publisher = {Association for Computing Machinery},
url = {https://doi.org/10.1145/2500130},
}
@InProceedings{10.1145/3366423.3380177,
author = {Azzam, Amr and Fern\'{a}ndez, Javier D. and Acosta, Maribel and Beno, Martin and Polleres, Axel},
booktitle = {Proceedings of The Web Conference 2020},
title = {SMART-KG: Hybrid Shipping for SPARQL Querying on the Web},
year = {2020},
address = {New York, NY, USA},
pages = {984–994},
publisher = {Association for Computing Machinery},
series = {WWW '20},
abstract = {While Linked Data (LD) provides standards for publishing (RDF) and (SPARQL) querying Knowledge Graphs (KGs) on the Web, serving, accessing and processing such open, decentralized KGs is often practically impossible, as query timeouts on publicly available SPARQL endpoints show. Alternative solutions such as Triple Pattern Fragments (TPF) attempt to tackle the problem of availability by pushing query processing workload to the client side, but suffer from unnecessary transfer of irrelevant data on complex queries with large intermediate results. In this paper we present smart-KG, a novel approach to share the load between servers and clients, while significantly reducing data transfer volume, by combining TPF with shipping compressed KG partitions. Our evaluations show that smart-KG outperforms state-of-the-art client-side solutions and increases server-side availability towards more cost-effective and balanced hosting of open and decentralized KGs.},
comment = {When I will be doing join optimization maybe the paper will be more useful, but for the moment I don't think it helped a lot internal partition.},
doi = {10.1145/3366423.3380177},
file = {:papers/SMART KG Hybrid Shipping for SPARQL Querying on the Web.pdf:PDF},
groups = {charateristic set},
isbn = {9781450370233},
location = {Taipei, Taiwan},
numpages = {11},
ranking = {rank3},
readstatus = {skimmed},
url = {https://doi.org/10.1145/3366423.3380177},
}
@Article{Neumann2011CharacteristicSA,
author = {Thomas Neumann and Guido Moerkotte},
journal = {2011 IEEE 27th International Conference on Data Engineering},
title = {Characteristic sets: Accurate cardinality estimation for RDF queries with multiple joins},
year = {2011},
pages = {984-994},
comment = {Je pense que cela ressemble beaucoup a shape, donc une approche de convertir des shape en une approximation de caracteristique set serait bien.},
file = {:papers/Characteristic sets Accurate cardinality estimation for RDF queries with multiple joins.pdf:PDF},
groups = {charateristic set},
ranking = {rank5},
readstatus = {skimmed},
url = {https://api.semanticscholar.org/CorpusID:2208604},
}
@Article{Meimaris2017ExtendedCS,
author = {Marios Meimaris and George Papastefanatos and Nikos Mamoulis and Ioannis Anagnostopoulos},
journal = {2017 IEEE 33rd International Conference on Data Engineering (ICDE)},
title = {Extended Characteristic Sets: Graph Indexing for SPARQL Query Optimization},
year = {2017},
pages = {497-508},
file = {:papers/Extended Characteristic Sets Graph Indexing for SPARQL Query Optimization.pdf:PDF},
groups = {charateristic set},
url = {https://api.semanticscholar.org/CorpusID:3535607},
}
@Article{Meimaris2018HierarchicalCS,
author = {Marios Meimaris and George Papastefanatos},
journal = {ArXiv},
title = {Hierarchical Characteristic Set Merging for Optimizing SPARQL Queries in Heterogeneous RDF},
year = {2018},
volume = {abs/1809.02345},
file = {:papers/Hierarchical Characteristic Set Merging for Optimizing SPARQL Queries in Heterogeneous RDF.pdf:PDF},
groups = {charateristic set},
url = {https://api.semanticscholar.org/CorpusID:52176979},
}
@InProceedings{staworko_et_al:LIPIcs:2015:4985,
author = {Slawek Staworko and Iovka Boneva and Jose E. Labra Gayo and Samuel Hym and Eric G. Prud'hommeaux and Harold Solbrig},
booktitle = {18th International Conference on Database Theory (ICDT 2015)},
title = {{Complexity and Expressiveness of ShEx for RDF}},
year = {2015},
address = {Dagstuhl, Germany},
editor = {Marcelo Arenas and Mart{\'i}n Ugarte},
pages = {195--211},
publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
volume = {31},
annote = {Keywords: RDF, Schema, Graph topology, Validation, Complexity, Expressiveness},
doi = {10.4230/LIPIcs.ICDT.2015.195},
groups = {shape},
isbn = {978-3-939897-79-8},
issn = {1868-8969},
url = {http://drops.dagstuhl.de/opus/volltexte/2015/4985},
urn = {urn:nbn:de:0030-drops-49856},
}
@Misc{zervakis2019efficient,
author = {Lefteris Zervakis and Vinay Setty and Christos Tryfonopoulos and Katja Hose},
title = {Efficient Continuous Multi-Query Processing over Graph Streams},
year = {2019},
archiveprefix = {arXiv},
eprint = {1902.05134},
file = {:papers/Efficient Continuous Multi-Query Processing over Graph Streams:},
groups = {query containment bag semantic},
primaryclass = {cs.DS},
}
@InProceedings{Hartig2012,
author = {Hartig, Olaf and Freytag, Johann-Christoph},
booktitle = {Conference on Hypertext and Social Media},
title = {Foundations of Traversal Based Query Execution over Linked Data},
publisher = {ACM},
series = {HT '12},
abstract = {Query execution over the Web of Linked Data has attracted much attention recently. A particularly interesting approach is link traversal based query execution which proposes to integrate the traversal of data links into the creation of query results. Hence -in contrast to traditional query execution paradigms- this does not assume a fixed set of relevant data sources beforehand; instead, the traversal process discovers data and data sources on the fly and, thus, enables applications to tap the full potential of the Web.While several authors have studied possibilities to implement the idea of link traversal based query execution and to optimize query execution in this context, no work exists that discusses theoretical foundations of the approach in general. Our paper fills this gap.We introduce a well-defined semantics for queries that may be executed using a link traversal based approach. Based on this semantics we formally analyze properties of such queries. In particular, we study the computability of queries as well as the implications of querying a potentially infinite Web of Linked Data. Our results show that query computation in general is not guaranteed to terminate and that for any given query it is undecidable whether the execution terminates. Furthermore, we define an abstract execution model that captures the integration of link traversal into the query execution process. Based on this model we prove the soundness and completeness of link traversal based query execution and analyze an existing implementation approach.},
address = {New York, NY, USA},
doi = {10.1145/2309996.2310005},
file = {:references/Foundations of Traversal Based Query Execution.pdf:PDF},
groups = {Link traversal, Link Traversal Query Processing},
isbn = {9781450313353},
keywords = {link traversal based query execution, computability, query semantics, web of data, linked data},
location = {Milwaukee, Wisconsin, USA},
numpages = {10},
pages = {43–52},
ranking = {rank5},
readstatus = {read},
url = {https://doi.org/10.1145/2309996.2310005},
year = {2012},
}
@InProceedings{10.1145/2661829.2661876,
author = {Wu, Buwen and Zhou, Yongluan and Yuan, Pingpeng and Jin, Hai and Liu, Ling},
booktitle = {Proceedings of the 23rd ACM International Conference on Conference on Information and Knowledge Management},
title = {SemStore: A Semantic-Preserving Distributed RDF Triple Store},
year = {2014},
address = {New York, NY, USA},
pages = {509–518},
publisher = {Association for Computing Machinery},
series = {CIKM '14},
abstract = {The flexibility of the RDF data model has attracted an increasing number of organizations to store their data in an RDF format. With the rapid growth of RDF datasets, we envision that it is inevitable to deploy a cluster of computing nodes to process large-scale RDF data in order to deliver desirable query performance. In this paper, we address the challenging problems of data partitioning and query optimization in a scale-out RDF engine. We identify that existing approaches only focus on using fine-grained structural information for data partitioning, and hence fail to localize many types of complex queries. We then propose a radically different approach, where a coarse-grained structure, namely Rooted Sub-Graph (RSG), is used as the partition unit. By doing so, we can capture structural information at a much greater scale and hence are able to localize many complex queries. We also propose a k-means partitioning algorithm for allocating the RSGs onto the computing nodes as well as a query optimization strategy to minimize the inter-node communication during query processing. An extensive experimental study using benchmark datasets and real dataset shows that our engine, SemStore, outperforms existing systems by orders of magnitudes in terms of query response time.},
doi = {10.1145/2661829.2661876},
file = {:papers/SemStore A Semantic Preserving Distributed RDF Triple Store.pdf:PDF},
groups = {query decomposition},
isbn = {9781450325981},
keywords = {sparql, rdf, query processing, partitioning},
location = {Shanghai, China},
numpages = {10},
url = {https://doi.org/10.1145/2661829.2661876},
}
@Article{10.14778/2977797.2977806,
author = {Sch\"{a}tzle, Alexander and Przyjaciel-Zablocki, Martin and Skilevic, Simon and Lausen, Georg},
journal = {Proc. VLDB Endow.},
title = {S2RDF: RDF Querying with SPARQL on Spark},
year = {2016},
issn = {2150-8097},
month = {jun},
number = {10},
pages = {804–815},
volume = {9},
abstract = {RDF has become very popular for semantic data publishing due to its flexible and universal graph-like data model. Thus, the ever-increasing size of RDF data collections raises the need for scalable distributed approaches. We endorse the usage of existing infrastructures for Big Data processing like Hadoop for this purpose. Yet, SPARQL query performance is a major challenge as Hadoop is not intentionally designed for RDF processing. Existing approaches often favor certain query pattern shapes while performance drops significantly for other shapes. In this paper, we introduce a novel relational partitioning schema for RDF data called ExtVP that uses a semi-join based preprocessing, akin to the concept of Join Indices in relational databases, to efficiently minimize query input size regardless of its pattern shape and diameter. Our prototype system S2RDF is built on top of Spark and uses SQL to execute SPARQL queries over ExtVP. We demonstrate its superior performance in comparison to state of the art SPARQL-on-Hadoop approaches.},
doi = {10.14778/2977797.2977806},
file = {:papers/RDF Querying with SPARQL on Spark.pdf:PDF},
groups = {query decomposition},
issue_date = {June 2016},
numpages = {12},
publisher = {VLDB Endowment},
url = {https://doi.org/10.14778/2977797.2977806},
}
@InProceedings{10.1145/2588555.2594535,
author = {Papailiou, Nikolaos and Tsoumakos, Dimitrios and Konstantinou, Ioannis and Karras, Panagiotis and Koziris, Nectarios},
booktitle = {Proceedings of the 2014 ACM SIGMOD International Conference on Management of Data},
title = {H2RDF+: An Efficient Data Management System for Big RDF Graphs},
year = {2014},
address = {New York, NY, USA},
pages = {909–912},
publisher = {Association for Computing Machinery},
series = {SIGMOD '14},
abstract = {The proliferation of data in RDF format has resulted in the emergence of a plethora of specialized management systems. While the ability to adapt to the complexity of a SPARQL query -- given their inherent diversity -- is crucial, current approaches do not scale well when faced with substantially complex, non-selective joins, resulting in exponential growth of execution times. In this demonstration we present H2 RDF+, an RDF store that efficiently performs distributed Merge and Sort-Merge joins using a multiple-index scheme over HBase indexes. Through a greedy planner that incorporates our cost-model, it adaptively commands for either single or multi-machine query execution based on join complexity. In this paper, we present its key scientific contributions and allow participants to interact with an H2RDF+ deployment over a Cloud infrastructure. Using a web-based GUI we allow users to load different datasets (both real and synthetic), apply any query (custom or predefined) and monitor its execution. By allowing real-time inspection of cluster status, response times and committed resources the audience will evaluate the validity of H2RDF+'s claims and perform direct comparisons to two other state-of-the-art RDF stores.},
doi = {10.1145/2588555.2594535},
file = {:papers/An Efficient Data Management System for Big.pdf:PDF},
groups = {query decomposition},
isbn = {9781450323765},
keywords = {rdf, joins, nosql, sparql, hbase, mapreduce, hadoop},
location = {Snowbird, Utah, USA},
numpages = {4},
url = {https://doi.org/10.1145/2588555.2594535},
}
@InProceedings{10.1145/2661829.2661876,
author = {Wu, Buwen and Zhou, Yongluan and Yuan, Pingpeng and Jin, Hai and Liu, Ling},
booktitle = {Proceedings of the 23rd ACM International Conference on Conference on Information and Knowledge Management},
title = {SemStore: A Semantic-Preserving Distributed RDF Triple Store},
year = {2014},
address = {New York, NY, USA},
pages = {509–518},
publisher = {Association for Computing Machinery},
series = {CIKM '14},
abstract = {The flexibility of the RDF data model has attracted an increasing number of organizations to store their data in an RDF format. With the rapid growth of RDF datasets, we envision that it is inevitable to deploy a cluster of computing nodes to process large-scale RDF data in order to deliver desirable query performance. In this paper, we address the challenging problems of data partitioning and query optimization in a scale-out RDF engine. We identify that existing approaches only focus on using fine-grained structural information for data partitioning, and hence fail to localize many types of complex queries. We then propose a radically different approach, where a coarse-grained structure, namely Rooted Sub-Graph (RSG), is used as the partition unit. By doing so, we can capture structural information at a much greater scale and hence are able to localize many complex queries. We also propose a k-means partitioning algorithm for allocating the RSGs onto the computing nodes as well as a query optimization strategy to minimize the inter-node communication during query processing. An extensive experimental study using benchmark datasets and real dataset shows that our engine, SemStore, outperforms existing systems by orders of magnitudes in terms of query response time.},
doi = {10.1145/2661829.2661876},
file = {:papers/A Semantic Preserving Distributed RDF Triple Store.pdf:PDF},
groups = {query decomposition},
isbn = {9781450325981},
keywords = {partitioning, rdf, query processing, sparql},
location = {Shanghai, China},
numpages = {10},
url = {https://doi.org/10.1145/2661829.2661876},
}
@InProceedings{Montoya2017,
author = {Montoya, Gabriela and Skaf-Molli, Hala and Hose, Katja},
booktitle = {The Semantic Web -- ISWC 2017},
title = {The Odyssey Approach for Optimizing Federated SPARQL Queries},
year = {2017},
address = {Cham},
editor = {d'Amato, Claudia and Fernandez, Miriam and Tamma, Valentina and Lecue, Freddy and Cudr{\'e}-Mauroux, Philippe and Sequeda, Juan and Lange, Christoph and Heflin, Jeff},
pages = {471--489},
publisher = {Springer International Publishing},
abstract = {Answering queries over a federation of SPARQL endpoints requires combining data from more than one data source. Optimizing queries in such scenarios is particularly challenging not only because of (i) the large variety of possible query execution plans that correctly answer the query but also because (ii) there is only limited access to statistics about schema and instance data of remote sources. To overcome these challenges, most federated query engines rely on heuristics to reduce the space of possible query execution plans or on dynamic programming strategies to produce optimal plans. Nevertheless, these plans may still exhibit a high number of intermediate results or high execution times because of heuristics and inaccurate cost estimations. In this paper, we present Odyssey, an approach that uses statistics that allow for a more accurate cost estimation for federated queries and therefore enables Odyssey to produce better query execution plans. Our experimental results show that Odyssey produces query execution plans that are better in terms of data transfer and execution time than state-of-the-art optimizers. Our experiments using the FedBench benchmark show execution time gains of at least 25 times on average.},
comment = {The paper is very interesting and can be a great base for the federated query approach even for the LTQP one I think I can get some good insight and compare the particularity of this problem with the approach of that paper.},
file = {:papers/The Odyssey Approach for Optimizing Federated.pdf:PDF},
groups = {charateristic set, federated queries},
isbn = {978-3-319-68288-4},
ranking = {rank5},
readstatus = {read},
}
@InProceedings{Schwarte2011,
author = {Schwarte, Andreas and Haase, Peter and Hose, Katja and Schenkel, Ralf and Schmidt, Michael},
booktitle = {The Semantic Web -- ISWC 2011},
title = {FedX: Optimization Techniques for Federated Query Processing on Linked Data},
year = {2011},
address = {Berlin, Heidelberg},
editor = {Aroyo, Lora and Welty, Chris and Alani, Harith and Taylor, Jamie and Bernstein, Abraham and Kagal, Lalana and Noy, Natasha and Blomqvist, Eva},
pages = {601--616},
publisher = {Springer Berlin Heidelberg},
abstract = {Motivated by the ongoing success of Linked Data and the growing amount of semantic data sources available on the Web, new challenges to query processing are emerging. Especially in distributed settings that require joining data provided by multiple sources, sophisticated optimization techniques are necessary for efficient query processing. We propose novel join processing and grouping techniques to minimize the number of remote requests, and develop an effective solution for source selection in the absence of preprocessed metadata. We present FedX, a practical framework that enables efficient SPARQL query processing on heterogeneous, virtually integrated Linked Data sources. In experiments, we demonstrate the practicability and efficiency of our framework on a set of real-world queries and data sources from the Linked Open Data cloud. With FedX we achieve a significant improvement in query performance over state-of-the-art federated query engines.},
file = {:papers/Optimization Techniques for Federated Query Processing on Linked Data.pdf:PDF},
groups = {federated queries},
isbn = {978-3-642-25073-6},
}
@InProceedings{Du2012,
author = {Du, Fang and Chen, Yueguo and Du, Xiaoyong},
booktitle = {Database Systems for Advanced Applications},
title = {Partitioned Indexes for Entity Search over RDF Knowledge Bases},
year = {2012},
address = {Berlin, Heidelberg},
editor = {Lee, Sang-goo and Peng, Zhiyong and Zhou, Xiaofang and Moon, Yang-Sae and Unland, Rainer and Yoo, Jaesoo},
pages = {141--155},
publisher = {Springer Berlin Heidelberg},
abstract = {The rapid growth of RDF data in RDF knowledge bases calls for efficient query processing techniques. This paper focuses on the star-style SPARQL join queries, which is very common when users want to search information of entities from RDF knowledge bases. We observe that the computational cost of such queries mainly comes from loading a large portion of predicate-ahead indexes. We therefore propose to partition the whole RDF knowledge bases based on the schema of individual entities, so that only entities of similar schemas are allocated into the same cluster. Such a partitioning strategy generates a pruning mechanism that effectively isolate the correlations of partitions and the queries. Consequently, queries are only conducted over a small number of partitions with small predicate-ahead indexes. Experiments over a large real-life RDF data set show the significant performance improvements achieved by our partitioned indexing techniques.},
file = {:papers/Partitioned Indexes for Entity Search.pdf:PDF},
groups = {charateristic set},
isbn = {978-3-642-29038-1},
}
@Misc{Taelman2023,
author = {Taelman, Ruben and Verborgh, Ruben},
title = {Link Traversal Query Processing Over Decentralized Environments with Structural Assumptions},
year = {2023},
abstract = {To counter societal and economic problems caused by data silos on the Web, efforts such as Solid strive to reclaim private data by storing it in permissioned documents over a large number of personal vaults across the Web. Building applications on top of such a decentralized Knowledge Graph involves significant technical challenges: centralized aggregation prior to query processing is impossible for legal reasons, and current federated querying techniques cannot handle this large scale of distribution at the expected performance. We propose an extension to Link Traversal Query Processing (LTQP) that incorporates structural properties within decentralized environments to tackle their unprecedented scale. In this article, we analyze the structural properties of the Solid decentralization ecosystem that are relevant for query execution, we introduce novel LTQP algorithms leveraging these structural properties, and evaluate their effectiveness. Our experiments indicate that these new algorithms obtain correct results in the order of seconds, which existing algorithms cannot achieve. This work reveals that a traversal-based querying method using structural assumptions can be effective for large-scale decentralization, but that advances are needed in the area of query planning for LTQP to handle more complex queries. These insights open the door to query-driven decentralized applications, in which declarative queries shield developers from the inherent complexity of a decentralized landscape.},
address = {Cham},
booktitle = {The Semantic Web -- ISWC 2023},
editor = {Payne, Terry R. and Presutti, Valentina and Qi, Guilin and Poveda-Villal{\'o}n, Mar{\'i}a and Stoilos, Giorgos and Hollink, Laura and Kaoudi, Zoi and Cheng, Gong and Li, Juanzi},
file = {:references/Link Traversal Query Processing over Decentralized Environments with Structural Assumptions.pdf:PDF},
groups = {Link traversal, Link Traversal Query Processing},
isbn = {978-3-031-47240-4},
pages = {3--22},
publisher = {Springer Nature Switzerland},
}
@Misc{hartig2016walking,
author = {Olaf Hartig and M. Tamer Özsu},
title = {Walking without a Map: Optimizing Response Times of Traversal-Based Linked Data Queries (Extended Version)},
year = {2016},
archiveprefix = {arXiv},
eprint = {1607.01046},
file = {:references/Walking without a Map.pdf:PDF},
groups = {Link Traversal Query Processing},
primaryclass = {cs.DB},
priority = {prio1},
}
@InCollection{Hartig2014,
author = {Olaf Hartig},
booktitle = {Emerging Directions in Database Systems and Applications},
publisher = {Chapman and Hall/{CRC}},
title = {Linked Data Query Processing Based on Link Traversal},
year = {2014},
month = may,
pages = {263--283},
doi = {10.1201/b16859-15},
file = {:papers/Linked Data Query Processing Based on Link Traversal.pdf:PDF},
groups = {Link Traversal Query Processing, query plan LTQP, query planning},
ranking = {rank3},
readstatus = {skimmed},
url = {https://doi.org/10.1201/b16859-15},
}
@InProceedings{Ladwig2010,
author = {Ladwig, G{\"u}nter and Tran, Thanh},
booktitle = {The Semantic Web -- ISWC 2010},
title = {Linked Data Query Processing Strategies},
year = {2010},
address = {Berlin, Heidelberg},
editor = {Patel-Schneider, Peter F. and Pan, Yue and Hitzler, Pascal and Mika, Peter and Zhang, Lei and Pan, Jeff Z. and Horrocks, Ian and Glimm, Birte},
pages = {453--469},
publisher = {Springer Berlin Heidelberg},
abstract = {Recently, processing of queries on linked data has gained attention. We identify and systematically discuss three main strategies: a bottom-up strategy that discovers new sources during query processing by following links between sources, a top-down strategy that relies on complete knowledge about the sources to select and process relevant sources, and a mixed strategy that assumes some incomplete knowledge and discovers new sources at run-time. To exploit knowledge discovered at run-time, we propose an additional step, explicitly scheduled during query processing, called correct source ranking. Additionally, we propose the adoption of stream-based query processing to deal with the unpredictable nature of data access in the distributed Linked Data environment. In experiments, we show that our implementation of the mixed strategy leads to early reporting of results and thus, more responsive query processing, while not requiring complete knowledge.},
file = {:papers/Linked Data Query Processing Strategies.pdf:PDF},
groups = {Link Traversal Query Processing, query plan LTQP, query planning},
isbn = {978-3-642-17746-0},
priority = {prio1},
}
@InProceedings{Ladwig2011,
author = {Ladwig, G{\"u}nter and Tran, Thanh},
booktitle = {The Semantic Web: Research and Applications},
title = {SIHJoin: Querying Remote and Local Linked Data},
year = {2011},
address = {Berlin, Heidelberg},
editor = {Antoniou, Grigoris and Grobelnik, Marko and Simperl, Elena and Parsia, Bijan and Plexousakis, Dimitris and De Leenheer, Pieter and Pan, Jeff},
pages = {139--153},
publisher = {Springer Berlin Heidelberg},
abstract = {The amount of Linked Data is increasing steadily. Optimized top-down Linked Data query processing based on complete knowledge about all sources, bottom-up processing based on run-time discovery of sources as well as a mixed strategy that combines them have been proposed. A particular problem with Linked Data processing is that the heterogeneity of the sources and access options lead to varying input latency, rendering the application of blocking join operators infeasible. Previous work partially address this by proposing a non-blocking iterator-based operator and another one based on symmetric-hash join. Here, we propose detailed cost models for these two operators to systematically compare them, and to allow for query optimization. Further, we propose a novel operator called the Symmetric Index Hash Join to address one open problem of Linked Data query processing: to query not only remote, but also local Linked Data. We perform experiments on real-world datasets to compare our approach against the iterator-based baseline, and create a synthetic dataset to more systematically analyze the impacts of the individual components captured by the proposed cost models.},
comment = {Article très intéressant qui présente un bon modèle de cout pour des join ainsi que leur implmentation d'un nouvelle algorithm de join dont son coût ne dépend pas du nombre de source ce qui en LTQP est vraiment très bon. Par contre pour le calcul de sélectivité, je ne suis pas sur comment cela ce fait.},
file = {:papers/SIHJoin Querying Remote and Local Linked.pdf:PDF},
groups = {Link Traversal Query Processing, query plan LTQP, query planning},
isbn = {978-3-642-21034-1},
ranking = {rank5},
readstatus = {read},
}
@InProceedings{Hartig2011,
author = {Hartig, Olaf},
booktitle = {The Semantic Web: Research and Applications},
title = {Zero-Knowledge Query Planning for an Iterator Implementation of Link Traversal Based Query Execution},
year = {2011},
address = {Berlin, Heidelberg},
editor = {Antoniou, Grigoris and Grobelnik, Marko and Simperl, Elena and Parsia, Bijan and Plexousakis, Dimitris and De Leenheer, Pieter and Pan, Jeff},
pages = {154--169},
publisher = {Springer Berlin Heidelberg},
abstract = {Link traversal based query execution is a new query execution paradigm for the Web of Data. This approach allows the execution engine to discover potentially relevant data during the query execution and, thus, enables users to tap the full potential of the Web. In earlier work we propose to implement the idea of link traversal based query execution using a synchronous pipeline of iterators. While this idea allows for an easy and efficient implementation, it introduces restrictions that cause less comprehensive result sets. In this paper we address this limitation. We analyze the restrictions and discuss how the evaluation order of a query may affect result set size and query execution costs. To identify a suitable order, we propose a heuristic for our scenario where no a-priory information about relevant data sources is present. We evaluate this heuristic by executing real-world queries over the Web of Data.},
file = {:papers/zero knowledge query planning.pdf:PDF},
groups = {query plan LTQP, query planning},
isbn = {978-3-642-21034-1},
priority = {prio1},
}
@Misc{kashif2021,
author = {Rabbani, Kashif and Lissandrini, Matteo and Hose, Katja},
title = {Optimizing SPARQL Queries using Shape Statistics},
year = {2021},
comment-id357 = {It's a really good paper that describe, and show how to use shape with added statistic plus global void statistic for query planning. I think it can easily be the based for my adaptative approach, I also think that there are some differences with this approach due to the adaptativity and the scoping of the data source.},
doi = {10.5441/002/EDBT.2021.59},
file = {:papers/Optimizing SPARQL queries using shape statistics.pdf:PDF},
groups = {Query plan with shape, query planning, idea to try},
keywords = {Database Technology},
language = {en},
publisher = {OpenProceedings.org},
ranking = {rank5},
readstatus = {read},
url = {https://openproceedings.org/2021/conf/edbt/p202.pdf},
}
@InProceedings{Haller2023AQA,
author = {David Haller},
booktitle = {PhD@VLDB},
title = {A Query-Driven Approach for SHACL Type Inference},
year = {2023},
file = {:papers/A Query Driven Approach for SHACL Type Inference.pdf:PDF},
groups = {Query plan with shape, query planning},
ranking = {rank1},
readstatus = {skimmed},
url = {https://api.semanticscholar.org/CorpusID:261102495},
}