Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat 309: Updates TARS Mappings #342

Merged
merged 5 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/aind_metadata_service/models.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Additional response models not defined in aind-data-schema"""

from typing import Optional
from typing import List, Optional

from aind_data_schema.core.data_description import Funding
from aind_data_schema.core.procedures import ViralMaterial
from pydantic import BaseModel, Field, field_validator


Expand Down Expand Up @@ -33,3 +34,9 @@ class FundingInformation(Funding):
information from the Funding SmartSheet"""

investigators: Optional[str] = Field(default=None)


class ViralMaterialInformation(ViralMaterial):
"""Viral Material with Stock Titer from SLIMS"""

stock_titer: Optional[List[int]] = Field(default=None)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is the stock titer a List?

Copy link
Contributor Author

@mekhlakapoor mekhlakapoor Feb 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In TARS, a user can technically add more than one Titer object (each having a value for "result"). In the example below, you can see the + sign on the top right to add more than one. Right now the stock_titer list is all of these titers (Preferred and not). If we only want the preferred value let me know.
image

Copy link
Contributor

@saskiad saskiad Feb 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we have a list of titers, we need to know what they map to. A single plasmid can only have one titer, so then we need a matching list for the plasmids.

It doesn’t make sense to have a “preferred” titer vs non-preferred, so that suggests to me that some of this is TARS functionality that they are just duplicating between modules. But if multiple titers is a thing for a prep lot, then we have to map plasmids differently than we do. E.g. titer and plasmids need to be linked directly.

Copy link
Contributor Author

@mekhlakapoor mekhlakapoor Feb 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now, I'm just returning the Preferred plasmid alias for plasmid_tars_alias. @dyf suggested we update the schema in a later ticket to handle the titers/plasmids list. This PR change the tars endpoint to return a custom model dictionary.

Copy link
Contributor

@saskiad saskiad Feb 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to do it right and have the plasmid being it’s own class.
But right now looking at TARS I don’t see a link between plasmids and titer, which means nobody has the information. Can we find an actual example of a prep lot with multiple plasmids. We’re doing all this work to accomodate what I understand is an uncommon (but real) use case and it would really help to have an actual example to work with rather than trying to build for hypotheticals.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I talked with Aaron. (a) no examples exist in TARS and (b) there will only be one titer because the plasmids are packed in one capsid. So make titer not a list and we’re good to go.

47 changes: 31 additions & 16 deletions src/aind_metadata_service/tars/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,24 @@ def _get_molecules_response(
response = requests.get(query, headers=headers)
return response

def _get_virus_response(self, virus_name: str) -> requests.models.Response:
"""
Retrieves virus from TARS.
Parameters
----------
virus_name: str
Virus name used to query Virus endpoint.
"""
headers = self._headers
query = (
f"{self.resource}/api/v1/Viruses"
f"?order=1&orderBy=id"
f"&searchFields=aliases.name"
f"&search={virus_name}"
)
response = requests.get(query, headers=headers)
return response

def get_injection_materials_info(
self, prep_lot_number: str
) -> ModelResponse:
Expand All @@ -140,24 +158,21 @@ def get_injection_materials_info(
injection_materials = []

for lot in data:
viral_prep_aliases = trh.map_virus_aliases(
aliases=lot["viralPrep"]["virus"]["aliases"]
# virus tars id is the preferred virus alias
virus_tars_id = next(
(
alias["name"]
for alias in lot["viralPrep"]["virus"]["aliases"]
if alias["isPreferred"]
),
None,
)
if (
viral_prep_aliases.plasmid_name
and viral_prep_aliases.full_genome_name is None
):
# check molecular registry for full genome name
molecule_response = self._get_molecules_response(
viral_prep_aliases.plasmid_name
)
viral_prep_aliases.full_genome_name = (
trh.map_full_genome_name(
molecule_response, viral_prep_aliases.plasmid_name
)
)
# check virus registry with tars id
virus_response = self._get_virus_response(virus_tars_id)
injection_material = trh.map_lot_to_injection_material(
viral_prep_lot=lot, viral_prep_aliases=viral_prep_aliases
viral_prep_lot=lot,
virus=virus_response.json()["data"][0],
virus_tars_id=virus_tars_id,
)
injection_materials.append(injection_material)
return ModelResponse(
Expand Down
122 changes: 63 additions & 59 deletions src/aind_metadata_service/tars/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import json
import logging
import re
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Dict, List, Optional
Expand All @@ -18,6 +17,7 @@
from pydantic import ValidationError

from aind_metadata_service.client import StatusCodes
from aind_metadata_service.models import ViralMaterialInformation
from aind_metadata_service.response_handler import ModelResponse


Expand Down Expand Up @@ -53,23 +53,6 @@ class PrepProtocols(Enum):
HGT1 = "HGT#1.0"


class VirusAliasPatterns(Enum):
"""Virus Alias Patterns"""

# TODO: add pattern for genome_name once confirmed
AIP = re.compile(r"^AiP[a-zA-Z0-9_-]+$")
AIV = re.compile(r"^AiV[a-zA-Z0-9_-]+$")


@dataclass
class ViralPrepAliases:
"""Model for mapping viral prep aliases"""

plasmid_name: Optional[str]
material_id: Optional[str]
full_genome_name: Optional[str]


class TarsResponseHandler:
"""This class will contain methods to handle the response from TARS"""

Expand Down Expand Up @@ -127,79 +110,97 @@ def _convert_datetime(date: str) -> Optional[date]:
return datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ").date()

@staticmethod
def map_virus_aliases(aliases: list) -> ViralPrepAliases:
"""Maps aliases to full_genome_name, material_id, viral prep id"""
plasmid_name, material_id, full_genome_name = None, None, None
for alias in aliases:
name = alias["name"]
if VirusAliasPatterns.AIP.value.match(name):
plasmid_name = name
elif VirusAliasPatterns.AIV.value.match(name):
material_id = name
else:
full_genome_name = name
viral_prep_aliases = ViralPrepAliases(
plasmid_name=plasmid_name,
material_id=material_id,
full_genome_name=full_genome_name,
def map_plasmid_name(aliases: List[Dict]) -> Optional[str]:
"""Maps plasmid name from aliases"""
return next(
(
alias.get("name")
for alias in aliases
if alias.get("isPreferred")
),
None,
)
return viral_prep_aliases

@staticmethod
def map_full_genome_name(response, plasmid_name) -> Optional[str]:
"""Maps genome name from molecular response"""
full_genome_name = None
data = response.json()["data"][0]
aliases = data["aliases"]
if len(aliases) == 2:
if aliases[0]["name"] != plasmid_name:
full_genome_name = aliases[0]["name"]
elif aliases[1]["name"] != plasmid_name:
full_genome_name = aliases[1]["name"]
return full_genome_name
def map_stock_titer(titers: List[dict]) -> Optional[int]:
"""Maps titer from viral prep lot"""
return [
int(titer["result"])
for titer in titers
if titer.get("result") is not None
]

def map_name_and_plasmid_from_virus_response(
self, virus: dict
) -> tuple[Optional[str], Optional[str]]:
"""Maps name and plasmid name from virus response"""
names = []
plasmid_aliases = []
for molecule in virus.get("molecules", []):
if molecule.get("fullName"):
names.append(molecule["fullName"])
plasmid_name = self.map_plasmid_name(molecule.get("aliases", []))
if plasmid_name:
plasmid_aliases.append(plasmid_name)
name = "; ".join(names) if names else None
plasmid_alias = "; ".join(plasmid_aliases) if plasmid_aliases else None
return name, plasmid_alias

def map_lot_to_injection_material(
self, viral_prep_lot: dict, viral_prep_aliases: ViralPrepAliases
self, viral_prep_lot: dict, virus: dict, virus_tars_id: str
) -> ViralMaterial:
"""
Map prep lot dictionary to injection materials
Parameters
----------
viral_prep_lot: dict
Dictionary of raw viral prep lot data from TARS response.
viral_prep_aliases: ViralPrepAliases
Prep aliases mapped from TARS viral prep and molecular endpoints.
virus: dict
Dictionary of raw virus data from TARS response.
"""
prep_lot_number = viral_prep_lot["lot"]
prep_date = self._convert_datetime(viral_prep_lot["datePrepped"])
prep_lot_number = viral_prep_lot.get("lot")
prep_date = self._convert_datetime(
viral_prep_lot.get("datePrepped", "")
)
prep_type, prep_protocol = self._map_prep_type_and_protocol(
viral_prep_lot["viralPrep"]["viralPrepType"]["name"]
viral_prep_lot.get("viralPrep", {})
.get("viralPrepType", {})
.get("name", "")
)
name, plasmid_alias = self.map_name_and_plasmid_from_virus_response(
virus
)
try:
tars_virus_identifiers = TarsVirusIdentifiers(
virus_tars_id=viral_prep_aliases.material_id,
plasmid_tars_alias=viral_prep_aliases.plasmid_name,
virus_tars_id=virus_tars_id,
plasmid_tars_alias=plasmid_alias,
prep_lot_number=prep_lot_number,
prep_date=prep_date,
prep_type=prep_type,
prep_protocol=prep_protocol,
)
return ViralMaterial(
name=viral_prep_aliases.full_genome_name,
return ViralMaterialInformation(
name=name,
tars_identifiers=tars_virus_identifiers,
stock_titer=self.map_stock_titer(
viral_prep_lot.get("titers", [])
),
)
except ValidationError:
tars_virus_identifiers = TarsVirusIdentifiers.model_construct(
virus_tars_id=viral_prep_aliases.material_id,
plasmid_tars_alias=viral_prep_aliases.plasmid_name,
virus_tars_id=virus_tars_id,
plasmid_tars_alias=plasmid_alias,
prep_lot_number=prep_lot_number,
prep_date=prep_date,
prep_type=prep_type,
prep_protocol=prep_protocol,
)
return ViralMaterial.model_construct(
name=viral_prep_aliases.full_genome_name,
return ViralMaterialInformation.model_construct(
name=name,
tars_identifiers=tars_virus_identifiers,
stock_titer=self.map_stock_titer(
viral_prep_lot.get("titers", [])
),
)

@staticmethod
Expand Down Expand Up @@ -275,6 +276,9 @@ def integrate_injection_materials( # noqa: C901
"data"
]
try:
data.pop(
"stock_titer", None
) # Remove extra field
new_material = ViralMaterial(**data)
new_material.titer = (
injection_material.titer
Expand Down
Loading