-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Entities #405
Open
bigabig
wants to merge
21
commits into
mwp_v1
Choose a base branch
from
entities
base: mwp_v1
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Entities #405
Changes from 12 commits
Commits
Show all changes
21 commits
Select commit
Hold shift + click to select a range
3f2cd2d
added backend (entity management)
60d90db
added frontend (entities)
e6ab5f1
update backend
31b49c8
update frontend
a4eb087
BugFix span text create_multi now doesnt create duplicates
615f6f3
Update entity to include KnowledgeBase and IsHuman (backend)
2340280
Update entity to include KnowledgeBase and IsHuman (frontend)
2b93d9a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] 02ac98a
update db and openapi
a03a895
update, more efficient entity cleanup, cleanup enpoint code
80da369
Merge branch 'entities' of github.com:uhh-lt/dats into entities
9fb795a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] 26e9791
Activated SubRow select, and removed entity_ids
0eb2052
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] 7040ba1
Added Typing projectEntitiesRows
91d2502
Merge branch 'entities' of github.com:uhh-lt/dats into entities
030d92a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] 7f7ab0a
Missed files/console.log
6258a27
Merge branch 'entities' of github.com:uhh-lt/dats into entities
4344024
Changes: fixed error in entity multi create (db objs and dtos were no…
ca824be
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
112 changes: 112 additions & 0 deletions
112
backend/src/alembic/versions/13cc78f77731_add_entity_and_span_text_entity_link.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
"""add entity and span text entity link | ||
|
||
Revision ID: 13cc78f77731 | ||
Revises: 2b91203d1bb6 | ||
Create Date: 2024-06-27 16:05:14.589423 | ||
|
||
""" | ||
|
||
from typing import Sequence, Union | ||
|
||
import sqlalchemy as sa | ||
|
||
from alembic import op | ||
|
||
# revision identifiers, used by Alembic. | ||
revision: str = "13cc78f77731" | ||
down_revision: Union[str, None] = "2b91203d1bb6" | ||
branch_labels: Union[str, Sequence[str], None] = None | ||
depends_on: Union[str, Sequence[str], None] = None | ||
|
||
|
||
def upgrade() -> None: | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.create_table( | ||
"entity", | ||
sa.Column("id", sa.Integer(), nullable=False), | ||
sa.Column("name", sa.String(), nullable=False), | ||
sa.Column( | ||
"created", sa.DateTime(), server_default=sa.text("now()"), nullable=True | ||
), | ||
sa.Column( | ||
"updated", sa.DateTime(), server_default=sa.text("now()"), nullable=True | ||
), | ||
sa.Column("is_human", sa.Boolean(), nullable=False), | ||
sa.Column("knowledge_base_id", sa.String(), nullable=False), | ||
sa.Column("project_id", sa.Integer(), nullable=False), | ||
sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"), | ||
sa.PrimaryKeyConstraint("id"), | ||
) | ||
op.create_index(op.f("ix_entity_created"), "entity", ["created"], unique=False) | ||
op.create_index(op.f("ix_entity_id"), "entity", ["id"], unique=False) | ||
op.create_index(op.f("ix_entity_is_human"), "entity", ["is_human"], unique=False) | ||
op.create_index( | ||
op.f("ix_entity_knowledge_base_id"), | ||
"entity", | ||
["knowledge_base_id"], | ||
unique=False, | ||
) | ||
op.create_index(op.f("ix_entity_name"), "entity", ["name"], unique=False) | ||
op.create_index( | ||
op.f("ix_entity_project_id"), "entity", ["project_id"], unique=False | ||
) | ||
op.create_table( | ||
"spantextentitylink", | ||
sa.Column("id", sa.Integer(), nullable=False), | ||
sa.Column("linked_entity_id", sa.Integer(), nullable=True), | ||
sa.Column("linked_span_text_id", sa.Integer(), nullable=True), | ||
sa.Column("is_human", sa.Boolean(), nullable=False), | ||
sa.ForeignKeyConstraint( | ||
["linked_entity_id"], ["entity.id"], ondelete="CASCADE" | ||
), | ||
sa.ForeignKeyConstraint( | ||
["linked_span_text_id"], ["spantext.id"], ondelete="CASCADE" | ||
), | ||
sa.PrimaryKeyConstraint("id"), | ||
) | ||
op.create_index( | ||
op.f("ix_spantextentitylink_id"), "spantextentitylink", ["id"], unique=False | ||
) | ||
op.create_index( | ||
op.f("ix_spantextentitylink_is_human"), | ||
"spantextentitylink", | ||
["is_human"], | ||
unique=False, | ||
) | ||
op.create_index( | ||
op.f("ix_spantextentitylink_linked_entity_id"), | ||
"spantextentitylink", | ||
["linked_entity_id"], | ||
unique=False, | ||
) | ||
op.create_index( | ||
op.f("ix_spantextentitylink_linked_span_text_id"), | ||
"spantextentitylink", | ||
["linked_span_text_id"], | ||
unique=False, | ||
) | ||
# ### end Alembic commands ### | ||
|
||
|
||
def downgrade() -> None: | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.drop_index( | ||
op.f("ix_spantextentitylink_linked_span_text_id"), | ||
table_name="spantextentitylink", | ||
) | ||
op.drop_index( | ||
op.f("ix_spantextentitylink_linked_entity_id"), table_name="spantextentitylink" | ||
) | ||
op.drop_index( | ||
op.f("ix_spantextentitylink_is_human"), table_name="spantextentitylink" | ||
) | ||
op.drop_index(op.f("ix_spantextentitylink_id"), table_name="spantextentitylink") | ||
op.drop_table("spantextentitylink") | ||
op.drop_index(op.f("ix_entity_project_id"), table_name="entity") | ||
op.drop_index(op.f("ix_entity_name"), table_name="entity") | ||
op.drop_index(op.f("ix_entity_knowledge_base_id"), table_name="entity") | ||
op.drop_index(op.f("ix_entity_is_human"), table_name="entity") | ||
op.drop_index(op.f("ix_entity_id"), table_name="entity") | ||
op.drop_index(op.f("ix_entity_created"), table_name="entity") | ||
op.drop_table("entity") | ||
# ### end Alembic commands ### |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
from typing import List | ||
|
||
from fastapi import APIRouter, Depends | ||
from sqlalchemy.orm import Session | ||
|
||
from api.dependencies import get_current_user, get_db_session | ||
from app.core.authorization.authz_user import AuthzUser | ||
from app.core.data.crud import Crud | ||
from app.core.data.crud.entity import crud_entity | ||
from app.core.data.dto.entity import ( | ||
EntityMerge, | ||
EntityRead, | ||
EntityRelease, | ||
EntityUpdate, | ||
) | ||
|
||
router = APIRouter( | ||
prefix="/entity", dependencies=[Depends(get_current_user)], tags=["entity"] | ||
) | ||
|
||
|
||
@router.patch( | ||
"/{entity_id}", | ||
response_model=EntityRead, | ||
summary="Updates the Entity with the given ID.", | ||
) | ||
def update_by_id( | ||
*, | ||
db: Session = Depends(get_db_session), | ||
entity_id: int, | ||
entity: EntityUpdate, | ||
authz_user: AuthzUser = Depends(), | ||
) -> EntityRead: | ||
authz_user.assert_in_same_project_as(Crud.ENTITY, entity_id) | ||
entity.is_human = True | ||
db_obj = crud_entity.update(db=db, id=entity_id, update_dto=entity) | ||
return EntityRead.model_validate(db_obj) | ||
|
||
|
||
# add merge endpoint | ||
@router.put( | ||
"/merge", | ||
response_model=EntityRead, | ||
summary="Merges entities and/or span texts with given IDs.", | ||
) | ||
def merge_entities( | ||
*, | ||
db: Session = Depends(get_db_session), | ||
entity_merge: EntityMerge, | ||
authz_user: AuthzUser = Depends(), | ||
) -> EntityRead: | ||
authz_user.assert_in_same_project_as_many(Crud.ENTITY, entity_merge.entity_ids) | ||
db_obj = crud_entity.merge(db, entity_merge=entity_merge) | ||
return EntityRead.model_validate(db_obj) | ||
|
||
|
||
# add resolve endpoint | ||
@router.put( | ||
"/release", | ||
response_model=List[EntityRead], | ||
summary="Releases entities and/or span texts with given IDs.", | ||
) | ||
def release_entities( | ||
*, | ||
db: Session = Depends(get_db_session), | ||
entity_release: EntityRelease, | ||
authz_user: AuthzUser = Depends(), | ||
) -> List[EntityRead]: | ||
authz_user.assert_in_same_project_as_many(Crud.ENTITY, entity_release.entity_ids) | ||
db_objs = crud_entity.release(db=db, entity_release=entity_release) | ||
return [EntityRead.model_validate(db_obj) for db_obj in db_objs] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
from itertools import chain | ||
from typing import List | ||
|
||
from fastapi.encoders import jsonable_encoder | ||
from sqlalchemy.orm import Session | ||
|
||
from app.core.data.crud.crud_base import CRUDBase | ||
from app.core.data.crud.span_text import crud_span_text | ||
from app.core.data.crud.span_text_entity_link import crud_span_text_entity_link | ||
from app.core.data.dto.entity import ( | ||
EntityCreate, | ||
EntityMerge, | ||
EntityRelease, | ||
EntityUpdate, | ||
) | ||
from app.core.data.dto.span_text_entity_link import ( | ||
SpanTextEntityLinkCreate, | ||
) | ||
from app.core.data.orm.entity import EntityORM | ||
from app.core.data.orm.span_text_entity_link import SpanTextEntityLinkORM | ||
|
||
|
||
class CRUDEntity(CRUDBase[EntityORM, EntityCreate, EntityUpdate]): | ||
def create( | ||
self, db: Session, *, create_dto: EntityCreate, force: bool = True | ||
) -> EntityORM: | ||
result = self.create_multi(db=db, create_dtos=[create_dto], force=force) | ||
return result[0] if len(result) > 0 else None | ||
|
||
def create_multi( | ||
self, db: Session, *, create_dtos: List[EntityCreate], force: bool = True | ||
) -> List[EntityORM]: | ||
if len(create_dtos) == 0: | ||
return [] | ||
|
||
# assumption all entities belong to the same project | ||
project_id = create_dtos[0].project_id | ||
|
||
# duplicate assignments to the same span text are filtered out here | ||
span_text_dict = {} | ||
for i, create_dto in enumerate(create_dtos): | ||
for span_text_id in create_dto.span_text_ids: | ||
span_text_dict[span_text_id] = i | ||
|
||
ids = list(span_text_dict.keys()) | ||
existing_links = crud_span_text_entity_link.read_multi_span_text_and_project_id( | ||
db=db, span_text_ids=ids, project_id=project_id | ||
) | ||
existing_link_ids = [link.linked_span_text_id for link in existing_links] | ||
old_entities = [link.linked_entity_id for link in existing_links] | ||
|
||
if not force: | ||
# if a span text is already assigned it should not be reassigned | ||
for id in existing_link_ids: | ||
del span_text_dict[id] | ||
|
||
indexes_to_use = list(set(span_text_dict.values())) | ||
create_dtos = [c for i, c in enumerate(create_dtos) if i in indexes_to_use] | ||
dto_objs_data = [ | ||
jsonable_encoder(dto, exclude={"span_text_ids"}) for dto in create_dtos | ||
] | ||
db_objs = [self.model(**data) for data in dto_objs_data] | ||
db.add_all(db_objs) | ||
db.flush() | ||
db.commit() | ||
|
||
links = [] | ||
for db_obj, create_dto in zip(db_objs, create_dtos): | ||
for span_text_id in create_dto.span_text_ids: | ||
links.append( | ||
SpanTextEntityLinkCreate( | ||
linked_entity_id=db_obj.id, linked_span_text_id=span_text_id | ||
) | ||
) | ||
crud_span_text_entity_link.create_multi(db=db, create_dtos=links) | ||
db.commit() | ||
if force: | ||
existing_links = ( | ||
crud_span_text_entity_link.read_multi_span_text_and_project_id( | ||
db=db, span_text_ids=ids, project_id=project_id | ||
) | ||
) | ||
new_entities = [x.linked_entity_id for x in existing_links] | ||
to_check = list(set(old_entities) - set(new_entities)) | ||
self.remove_unused_entites(db=db, ids=to_check) | ||
return db_objs | ||
|
||
def read_by_project(self, db: Session, proj_id: int) -> List[EntityORM]: | ||
return db.query(self.model).filter(self.model.project_id == proj_id).all() | ||
|
||
def remove_multi(self, db: Session, *, ids: List[int]) -> List[EntityORM]: | ||
removed = db.query(EntityORM).filter(EntityORM.id.in_(ids)).all() | ||
db.query(EntityORM).filter(EntityORM.id.in_(ids)).delete( | ||
synchronize_session=False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is this? |
||
) | ||
db.commit() | ||
return removed | ||
|
||
def remove_unused_entites(self, db: Session, ids: List[int]) -> List[EntityORM]: | ||
linked_ids_result = ( | ||
db.query(SpanTextEntityLinkORM.linked_entity_id) | ||
.filter(SpanTextEntityLinkORM.linked_entity_id.in_(ids)) | ||
.distinct() | ||
.all() | ||
) | ||
linked_ids = {item[0] for item in linked_ids_result} | ||
ids = list(set(ids) - set(linked_ids)) | ||
return self.remove_multi(db=db, ids=ids) | ||
|
||
def merge(self, db: Session, entity_merge: EntityMerge) -> EntityORM: | ||
all_span_texts = ( | ||
list( | ||
chain.from_iterable( | ||
[st.id for st in crud_entity.read(db=db, id=id).span_texts] | ||
for id in entity_merge.entity_ids | ||
) | ||
) | ||
+ entity_merge.spantext_ids | ||
) | ||
new_entity = EntityCreate( | ||
name=entity_merge.name, | ||
project_id=entity_merge.project_id, | ||
span_text_ids=all_span_texts, | ||
is_human=True, | ||
knowledge_base_id=entity_merge.knowledge_base_id, | ||
) | ||
return self.create(db=db, create_dto=new_entity, force=True) | ||
|
||
def release(self, db: Session, entity_release: EntityRelease) -> List[EntityORM]: | ||
all_span_texts = ( | ||
list( | ||
chain.from_iterable( | ||
[st.id for st in self.read(db=db, id=id).span_texts] | ||
for id in entity_release.entity_ids | ||
) | ||
) | ||
+ entity_release.spantext_ids | ||
) | ||
new_entities = [] | ||
for span_text_id in all_span_texts: | ||
span_text = crud_span_text.read(db=db, id=span_text_id) | ||
new_entity = EntityCreate( | ||
name=span_text.text, | ||
project_id=entity_release.project_id, | ||
span_text_ids=[span_text_id], | ||
) | ||
new_entities.append(new_entity) | ||
db_objs = self.create_multi(db=db, create_dtos=new_entities, force=True) | ||
self.remove_unused_entites(db=db, ids=entity_release.entity_ids) | ||
return db_objs | ||
|
||
|
||
crud_entity = CRUDEntity(EntityORM) |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
create multi muss mit hilfe einer hash map umgesetzt werden.
span_text_id -> entity
dann alle entities erstellen.