Skip to content
This repository was archived by the owner on Oct 23, 2023. It is now read-only.

Commit a126c38

Browse files
authored
Merge pull request #64 from CSCfi/feature/different_loader
Optimise init load in DB for parsing VCF file
2 parents 9322d90 + 1136323 commit a126c38

File tree

3 files changed

+55
-23
lines changed

3 files changed

+55
-23
lines changed

beacon_api/utils/db_load.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -101,24 +101,18 @@ def _unpack(self, variant):
101101
me_type = ['dup:tandem', 'del:me', 'ins:me']
102102
# sv_type = ['dup', 'inv', 'ins', 'del', 'cnv']
103103
# supported_vt = ['snp', 'indel', 'mnp', 'dup', 'inv', 'ins', 'del']
104-
for k, v in variant.INFO:
105-
if k == 'AC':
106-
ac = self._handle_type(v, int)
107-
if k == 'AN':
108-
an = v
109-
else:
110-
an = variant.num_called*2
111-
if k == 'AF':
112-
aaf = self._handle_type(v, float)
113-
else:
114-
aaf = [float(ac_value) / float(an) for ac_value in ac]
115-
if variant.is_sv:
116-
alt = [elem.strip("<>") for elem in variant.ALT]
117-
if k == 'SVTYPE':
118-
vt = [self._rchop(e, ":"+v) if e.lower().startswith(tuple(me_type)) else v for e in alt]
119-
else:
120-
if k == 'VT':
121-
vt = [self._transform_vt(var_type.lower(), variant) for var_type in v.split(',')]
104+
ac = self._handle_type(variant.INFO.get('AC'), int) if variant.INFO.get('AC') else []
105+
an = variant.INFO.get('AN') if variant.INFO.get('AN') else variant.num_called * 2
106+
aaf = self._handle_type(variant.INFO.get('AF'), float) if variant.INFO.get('AF') else [float(ac_value) / float(an) for ac_value in ac]
107+
if variant.is_sv:
108+
alt = [elem.strip("<>") for elem in variant.ALT]
109+
if variant.INFO.get('SVTYPE'):
110+
v = variant.INFO.get('SVTYPE')
111+
vt = [self._rchop(e, ":"+v) if e.lower().startswith(tuple(me_type)) else v for e in alt]
112+
else:
113+
if variant.INFO.get('VT'):
114+
v = variant.INFO.get('VT')
115+
vt = [self._transform_vt(var_type.lower(), variant) for var_type in v.split(',')]
122116

123117
return (aaf, ac, vt, alt, an)
124118

@@ -235,6 +229,7 @@ async def insert_variants(self, dataset_id, variants):
235229
async with self._conn.transaction():
236230
LOG.info('Insert variants into the database')
237231
for variant in variants:
232+
# print(variant)
238233
# params = (frequency, count, actual variant Type)
239234
if variant.aaf > 0:
240235
params = self._unpack(variant)

tests/test_basic.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import asynctest
22
from beacon_api.utils.db_load import parse_arguments, init_beacon_db, main
33
from beacon_api.conf.config import init_db_pool
4+
from beacon_api.permissions.rems import get_rems_controlled
45
from testfixtures import TempDirectory
56

67

@@ -94,6 +95,21 @@ def test_main_db(self, mock_init):
9495
main()
9596
mock_init.assert_called()
9697

98+
def test_rems_controlled(self):
99+
"""Test rems permissions claim parsing."""
100+
claim = [{"affiliation": "",
101+
"datasets": ["EGAD01", "urn:hg:example-controlled"],
102+
"source_signature": "",
103+
"url_prefix": ""},
104+
{"affiliation": "",
105+
"datasets": ["urn:hg:example-controlled", "EGAD02",
106+
"urn:hg:example-controlled3"],
107+
"source_signature": "",
108+
"url_prefix": ""}]
109+
self.assertCountEqual(get_rems_controlled(claim),
110+
['EGAD01', 'urn:hg:example-controlled',
111+
'urn:hg:example-controlled3', 'EGAD02'])
112+
97113

98114
if __name__ == '__main__':
99115
asynctest.main()

tests/test_db_load.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
class Variant:
88
"""Variant Class.
99
10-
Mock this for Variant caculations.
10+
Mock this for Variant calculations.
1111
"""
1212

13-
def __init__(self, ALT, REF, INFO, call_rate, var_type, num_called):
13+
def __init__(self, ALT, REF, INF, call_rate, var_type, num_called):
1414
"""Initialize class."""
15-
self.INFO = INFO.items()
15+
self.INFO = INF
1616
self.ALT = ALT
1717
self.REF = REF
1818
self.call_rate = call_rate
@@ -21,6 +21,24 @@ def __init__(self, ALT, REF, INFO, call_rate, var_type, num_called):
2121
self.is_sv = False
2222

2323

24+
class INFO:
25+
"""INFO CLass.
26+
27+
Mock this for storing VCF info.
28+
"""
29+
30+
def __init__(self, AC, VT, AN):
31+
"""Initialize class."""
32+
self.AC = AC
33+
self.VT = VT
34+
self.AN = AN
35+
self.AF = 0
36+
37+
def get(self, key):
38+
"""Inside `__getitem__` method."""
39+
return getattr(self, key)
40+
41+
2442
class Transaction:
2543
"""Class Transaction.
2644
@@ -234,10 +252,13 @@ async def test_unpack(self, db_mock, mock_log):
234252
"""Test database URL fetching."""
235253
db_mock.return_value = Connection()
236254
await self._db.connection()
237-
variant = Variant('TC', 'T', {'AC': (1, 2), 'VT': 'M,S,I', 'AN': 3}, 0.7, 'snp', 3)
255+
inf1 = INFO((1, 2), 'M,S,I', 3)
256+
variant = Variant('TC', 'T', inf1, 0.7, 'snp', 3)
238257
result = self._db._unpack(variant)
239258
self.assertEqual(([0.3333333333333333, 0.6666666666666666], [1, 2], ['MNP', 'SNP', 'INS'], 'TC', 3), result)
240-
variant = Variant('TC', 'T', {'AC': 1, 'VT': 'S', 'AN': 3}, 0.7, 'snp', 3)
259+
inf2 = INFO(1, 'S', 3)
260+
variant = Variant('TC', 'T', inf2, 0.7, 'snp', 3)
261+
# print(variant.INFO.get('AC'))
241262
result = self._db._unpack(variant)
242263
self.assertEqual(([0.3333333333333333], [1], ['SNP'], 'TC', 3), result)
243264

0 commit comments

Comments
 (0)