Skip to content

Commit

Permalink
fix redis tokenization for icd11 ids (#2193)
Browse files Browse the repository at this point in the history
* fix redis tokenization for icd11 ids

* remove index on the default label field

* rename search vector field to be consistent with other models

* add test case

---------

Co-authored-by: Vignesh Hari <[email protected]>
  • Loading branch information
sainak and vigneshhari authored May 28, 2024
1 parent 9edfed0 commit 0e947b8
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 60 deletions.
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ pyjwt = "==2.8.0"
python-slugify = "==8.0.1"
pywebpush = "==1.14.0"
redis = {extras = ["hiredis"], version = "<5.0.0"} # constraint for redis-om
redis-om = "==0.3.1"
requests = "==2.31.0"
sentry-sdk = "==1.30.0"
whitenoise = "==6.6.0"
redis-om = "==0.2.1"

[dev-packages]
black = "==24.4.2"
Expand Down
110 changes: 55 additions & 55 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion care/facility/api/viewsets/icd.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def list(self, request):

query = [ICD11.has_code == 1]
if q := request.query_params.get("query"):
query.append(ICD11.label % query_builder(q))
query.append(ICD11.vec % query_builder(q))

result = FindQuery(expressions=query, model=ICD11, limit=limit).execute(
exhaust_results=False
Expand Down
5 changes: 4 additions & 1 deletion care/facility/static_data/icd11.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ class ICD11Object(TypedDict):

class ICD11(BaseRedisModel):
id: int = Field(primary_key=True)
label: str = Field(index=True, full_text_search=True)
label: str
chapter: str
has_code: int = Field(index=True)

vec: str = Field(index=True, full_text_search=True)

def get_representation(self) -> ICD11Object:
return {
"id": self.id,
Expand All @@ -45,6 +47,7 @@ def load_icd11_diagnosis():
label=diagnosis[1],
chapter=diagnosis[2] or "",
has_code=1 if re.match(DISEASE_CODE_PATTERN, diagnosis[1]) else 0,
vec=diagnosis[1].replace(".", "\\.", 1),
).save()
Migrator().run()
print("Done")
Expand Down
3 changes: 3 additions & 0 deletions care/facility/tests/test_icd11_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ def test_search_with_disease_code(self):
res = self.search_icd11("ME24.A1")
self.assertContains(res, "ME24.A1 Haemorrhage of anus and rectum")

res = self.search_icd11("CA22.Z")
self.assertContains(res, "CA22.Z Chronic obstructive pulmonary disease")

res = self.search_icd11("1A00 Cholera")
self.assertContains(res, "1A00 Cholera")

Expand Down
4 changes: 2 additions & 2 deletions care/utils/static_data/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

from redis_om.model.token_escaper import TokenEscaper

token_escaper = TokenEscaper(re.compile(r"[,<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]"))
token_escaper = TokenEscaper(re.compile(r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]"))


def query_builder(query: str) -> str:
"""
Builds a query for redis full text search from a given query string.
"""
words = query.strip().rstrip(".").rsplit(maxsplit=3)
words = query.strip().rsplit(maxsplit=3)
return f"{'* '.join([token_escaper.escape(word) for word in words])}*"

0 comments on commit 0e947b8

Please sign in to comment.