Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Entities #405

Open
wants to merge 21 commits into
base: mwp_v1
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
3f2cd2d
added backend (entity management)
Jun 25, 2024
60d90db
added frontend (entities)
Jun 25, 2024
e6ab5f1
update backend
Jun 25, 2024
31b49c8
update frontend
Jun 25, 2024
a4eb087
BugFix span text create_multi now doesnt create duplicates
Jun 26, 2024
615f6f3
Update entity to include KnowledgeBase and IsHuman (backend)
Jun 26, 2024
2340280
Update entity to include KnowledgeBase and IsHuman (frontend)
Jun 26, 2024
2b93d9a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 27, 2024
02ac98a
update db and openapi
Jun 27, 2024
a03a895
update, more efficient entity cleanup, cleanup enpoint code
Jul 4, 2024
80da369
Merge branch 'entities' of github.com:uhh-lt/dats into entities
Jul 4, 2024
9fb795a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 4, 2024
26e9791
Activated SubRow select, and removed entity_ids
Aug 6, 2024
0eb2052
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 6, 2024
7040ba1
Added Typing projectEntitiesRows
Aug 6, 2024
91d2502
Merge branch 'entities' of github.com:uhh-lt/dats into entities
Aug 6, 2024
030d92a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 6, 2024
7f7ab0a
Missed files/console.log
Aug 6, 2024
6258a27
Merge branch 'entities' of github.com:uhh-lt/dats into entities
Aug 6, 2024
4344024
Changes: fixed error in entity multi create (db objs and dtos were no…
Aug 8, 2024
ca824be
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions backend/src/api/endpoints/entity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from itertools import chain
from typing import List

from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session

from api.dependencies import get_current_user, get_db_session
from app.core.authorization.authz_user import AuthzUser
from app.core.data.crud import Crud
from app.core.data.crud.entity import crud_entity
from app.core.data.crud.span_text import crud_span_text
from app.core.data.dto.entity import (
EntityCreate,
EntityMerge,
EntityRead,
EntityRelease,
EntityUpdate,
)

router = APIRouter(
prefix="/entity", dependencies=[Depends(get_current_user)], tags=["entity"]
)


@router.patch(
"/{entity_id}",
response_model=EntityRead,
summary="Updates the Entity with the given ID.",
)
def update_by_id(
*,
db: Session = Depends(get_db_session),
entity_id: int,
entity: EntityUpdate,
authz_user: AuthzUser = Depends(),
) -> EntityRead:
authz_user.assert_in_same_project_as(Crud.ENTITY, entity_id)
entity.is_human = True
db_obj = crud_entity.update(db=db, id=entity_id, update_dto=entity)
return EntityRead.model_validate(db_obj)


# add merge endpoint
@router.put(
"/merge",
response_model=EntityRead,
summary="Merges entities and/or span texts with given IDs.",
)
def merge_entities(
*,
db: Session = Depends(get_db_session),
entity_merge: EntityMerge,
authz_user: AuthzUser = Depends(),
) -> EntityRead:
authz_user.assert_in_same_project_as_many(Crud.ENTITY, entity_merge.entity_ids)
all_span_texts = (
list(
chain.from_iterable(
[st.id for st in crud_entity.read(db=db, id=id).span_texts]
for id in entity_merge.entity_ids
)
)
+ entity_merge.spantext_ids
)
new_entity = EntityCreate(
name=entity_merge.name,
project_id=entity_merge.project_id,
span_text_ids=all_span_texts,
is_human=True,
knowledge_base_id=entity_merge.knowledge_base_id,
)
db_obj = crud_entity.create(db=db, create_dto=new_entity, force=True)
return EntityRead.model_validate(db_obj)


# add resolve endpoint
@router.put(
"/release",
response_model=List[EntityRead],
summary="Releases entities and/or span texts with given IDs.",
)
def release_entities(
*,
db: Session = Depends(get_db_session),
entity_resolve: EntityRelease,
authz_user: AuthzUser = Depends(),
) -> EntityRead:
authz_user.assert_in_same_project_as_many(Crud.ENTITY, entity_resolve.entity_ids)
all_span_texts = (
list(
chain.from_iterable(
[st.id for st in crud_entity.read(db=db, id=id).span_texts]
for id in entity_resolve.entity_ids
)
)
+ entity_resolve.spantext_ids
)
new_entities = []
for span_text_id in all_span_texts:
span_text = crud_span_text.read(db=db, id=span_text_id)
new_entity = EntityCreate(
name=span_text.text,
project_id=entity_resolve.project_id,
span_text_ids=[span_text_id],
)
new_entities.append(new_entity)
db_objs = crud_entity.create_multi(db=db, create_dtos=new_entities, force=True)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bitte crud_entity.release_... erstellen. und hier aufrufen.

Bei Entity Create fehlt is_human=false

return [EntityRead.model_validate(db_obj) for db_obj in db_objs]
21 changes: 21 additions & 0 deletions backend/src/api/endpoints/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@
from app.core.data.crud.code import crud_code
from app.core.data.crud.crud_base import NoSuchElementError
from app.core.data.crud.document_tag import crud_document_tag
from app.core.data.crud.entity import crud_entity
from app.core.data.crud.memo import crud_memo
from app.core.data.crud.project import crud_project
from app.core.data.crud.project_metadata import crud_project_meta
from app.core.data.crud.source_document import crud_sdoc
from app.core.data.dto.action import ActionQueryParameters, ActionRead
from app.core.data.dto.code import CodeRead
from app.core.data.dto.document_tag import DocumentTagRead
from app.core.data.dto.entity import EntityRead
from app.core.data.dto.memo import AttachedObjectType, MemoCreate, MemoInDB, MemoRead
from app.core.data.dto.preprocessing_job import PreprocessingJobRead
from app.core.data.dto.project import ProjectCreate, ProjectRead, ProjectUpdate
Expand Down Expand Up @@ -530,3 +532,22 @@ def find_duplicate_text_sdocs(
return DuplicateFinderService().find_duplicate_text_sdocs(
project_id=proj_id, max_different_words=max_different_words
)


@router.get(
"/{proj_id}/entity",
response_model=List[EntityRead],
summary="Returns all Entities of the Project with the given ID",
)
def get_project_entities(
*,
proj_id: int,
db: Session = Depends(get_db_session),
authz_user: AuthzUser = Depends(),
) -> List[EntityRead]:
authz_user.assert_in_project(proj_id)

result = crud_entity.read_by_project(db=db, proj_id=proj_id)
result = [EntityRead.model_validate(entity) for entity in result]
result.sort(key=lambda c: c.id)
return result
4 changes: 4 additions & 0 deletions backend/src/app/core/data/crud/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from app.core.data.crud.concept_over_time_analysis import crud_cota
from app.core.data.crud.current_code import crud_current_code
from app.core.data.crud.document_tag import crud_document_tag
from app.core.data.crud.entity import crud_entity
from app.core.data.crud.memo import crud_memo
from app.core.data.crud.object_handle import crud_object_handle
from app.core.data.crud.preprocessing_job import crud_prepro_job
Expand All @@ -21,6 +22,7 @@
from app.core.data.crud.span_annotation import crud_span_anno
from app.core.data.crud.span_group import crud_span_group
from app.core.data.crud.span_text import crud_span_text
from app.core.data.crud.span_text_entity_link import crud_span_text_entity_link
from app.core.data.crud.timeline_analysis import crud_timeline_analysis
from app.core.data.crud.user import crud_user
from app.core.data.crud.whiteboard import crud_whiteboard
Expand Down Expand Up @@ -51,3 +53,5 @@ class Crud(Enum):
COTA_ANALYSIS = crud_cota
USER = crud_user
WHITEBOARD = crud_whiteboard
ENTITY = crud_entity
SPAN_TEXT_ENTITY_LINK = crud_span_text_entity_link
75 changes: 75 additions & 0 deletions backend/src/app/core/data/crud/entity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from typing import List, Optional

from fastapi.encoders import jsonable_encoder
from sqlalchemy import select
from sqlalchemy.orm import Session

from app.core.data.crud.crud_base import CRUDBase
from app.core.data.crud.span_text_entity_link import crud_span_text_entity_link
from app.core.data.dto.entity import (
EntityCreate,
EntityUpdate,
)
from app.core.data.dto.span_text_entity_link import (
SpanTextEntityLinkCreate,
)
from app.core.data.orm.entity import EntityORM
from app.core.data.orm.span_text_entity_link import SpanTextEntityLinkORM


class CRUDEntity(CRUDBase[EntityORM, EntityCreate, EntityUpdate]):
def create(
self, db: Session, *, create_dto: EntityCreate, force: bool = True
) -> EntityORM:
result = self.create_multi(db=db, create_dtos=[create_dto], force=force)
return result[0] if len(result) > 0 else None

def create_multi(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

create multi muss mit hilfe einer hash map umgesetzt werden.

span_text_id -> entity

dann alle entities erstellen.

self, db: Session, *, create_dtos: List[EntityCreate], force: bool = True
) -> List[EntityORM]:
if len(create_dtos) == 0:
return []
dto_objs_data = [
jsonable_encoder(dto, exclude={"span_text_ids"}) for dto in create_dtos
]
db_objs = [self.model(**data) for data in dto_objs_data]
db.add_all(db_objs)
db.flush()
db.commit()

links = []
for db_obj, create_dto in zip(db_objs, create_dtos):
for span_text_id in create_dto.span_text_ids:
links.append(
SpanTextEntityLinkCreate(
linked_entity_id=db_obj.id, linked_span_text_id=span_text_id
)
)
crud_span_text_entity_link.create_multi(db=db, create_dtos=links, force=force)
db.commit()
self.remove_all_unused_entites(db=db)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

das muss weg

return db_objs

def read_by_project(self, db: Session, proj_id: int) -> List[EntityORM]:
return db.query(self.model).filter(self.model.project_id == proj_id).all()

def remove_multi(self, db: Session, *, ids: List[int]) -> List[EntityORM]:
removed = db.query(EntityORM).filter(EntityORM.id.in_(ids)).all()
db.query(EntityORM).filter(EntityORM.id.in_(ids)).delete(
synchronize_session=False
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this?

)
db.commit()
return removed

def remove_all_unused_entites(self, db: Session) -> List[EntityORM]:
subquery = select(SpanTextEntityLinkORM.linked_entity_id).distinct().subquery()
query = (
db.query(EntityORM)
.outerjoin(subquery, EntityORM.id == subquery.c.linked_entity_id)
.filter(subquery.c.linked_entity_id.is_(None))
)
to_remove = query.all()
return self.remove_multi(db=db, ids=[e.id for e in to_remove])


crud_entity = CRUDEntity(EntityORM)
44 changes: 44 additions & 0 deletions backend/src/app/core/data/crud/span_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
from sqlalchemy.orm import Session

from app.core.data.crud.annotation_document import crud_adoc
from app.core.data.crud.code import crud_code
from app.core.data.crud.crud_base import CRUDBase
from app.core.data.crud.entity import crud_entity
from app.core.data.crud.span_group import crud_span_group
from app.core.data.crud.span_text import crud_span_text
from app.core.data.dto.action import ActionType
from app.core.data.dto.code import CodeRead
from app.core.data.dto.entity import EntityCreate
from app.core.data.dto.span_annotation import (
SpanAnnotationCreate,
SpanAnnotationCreateWithCodeId,
Expand All @@ -35,6 +38,20 @@ def create(
db=db, create_dto=SpanTextCreate(text=create_dto.span_text)
)

# create the entity
code = (
db.query(CodeORM).filter(CodeORM.id == create_dto.current_code_id).first()
)
project_id = code.project_id
crud_entity.create(
db=db,
create_dto=EntityCreate(
name=create_dto.span_text,
project_id=project_id,
span_text_ids=[span_text_orm.id],
),
)

# create the SpanAnnotation (and link the SpanText via FK)
dto_obj_data = jsonable_encoder(create_dto.model_dump(exclude={"span_text"}))
# noinspection PyArgumentList
Expand Down Expand Up @@ -90,6 +107,33 @@ def create_multi(
],
)

# create the entities
code = crud_code.read(db=db, id=create_dtos[0].current_code_id)
project_id = code.project_id
try:
crud_entity.create_multi(
db=db,
create_dtos=[
EntityCreate(
project_id=project_id,
name=dto.span_text,
span_text_ids=[id.id],
is_human=False,
)
for id, dto in zip(span_texts_orm, create_dtos)
],
)
except Exception as e:
raise Exception(
str(e)
+ "\n"
+ str(span_texts_orm)
+ "\n"
+ str([type(id) for id in span_texts_orm])
+ "\n"
+ str([id.as_dict() for id in span_texts_orm])
)

# create the SpanAnnotation (and link the SpanText via FK)
dto_objs_data = [
jsonable_encoder(create_dto.model_dump(exclude={"span_text"}))
Expand Down
15 changes: 10 additions & 5 deletions backend/src/app/core/data/crud/span_text.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Optional
from typing import Dict, List, Optional

from sqlalchemy.orm import Session

Expand All @@ -25,19 +25,24 @@ def create_multi(
# Only create when not already present
span_texts: List[SpanTextORM] = []
to_create: List[SpanTextCreate] = []
span_text_idx: List[int] = []
to_create_idx: List[int] = []
text_create_map: Dict[str, int] = {}

# TODO best would be "insert all (ignore existing) followed by get all"
for i, create_dto in enumerate(create_dtos):
db_obj = self.read_by_text(db=db, text=create_dto.text)
span_texts.append(db_obj)
if db_obj is None:
to_create.append(create_dto)
to_create_idx.append(i)
if create_dto.text not in text_create_map:
text_create_map[create_dto.text] = len(to_create)
to_create.append(create_dto)
span_text_idx.append(i)
to_create_idx.append(text_create_map[create_dto.text])
if len(to_create) > 0:
created = super().create_multi(db=db, create_dtos=to_create)
for i, obj in zip(to_create_idx, created):
span_texts[i] = obj
for obj_idx, pos_idx in zip(to_create_idx, span_text_idx):
span_texts[pos_idx] = created[obj_idx]
# Ignore types: We've made sure that no `None` values remain since we've created
# span texts to replace them
return span_texts # type: ignore
Expand Down
Loading
Loading