def fetch_resource_mention(query: dict, expanded: bool = True, conn: Optional[Connection] = None, engine: Optional[Engine] = None, debug: bool = False) -> Optional[list]:
"""Fetch ResourceMention(s) from the database matching the provided query.
Args:
query (dict): Dictionary of column names and values to match for selection.
expanded (bool, optional): If `True`, fetch associated resource, version, and publication.
conn (Optional[Connection], optional): SQLAlchemy Connection object.
engine (Optional[Engine], optional): SQLAlchemy Engine object.
debug (bool, optional): If `True`, print debug information.
Returns:
list of ResourceMention objects if found, else `None`.
"""
from .resource_mention import ResourceMention, MatchedAlias
order_by = ['publication_id', 'resource_id', 'match_count']
mention_raw = select_from_table('resource_mention', query, order_by=order_by, conn=conn, engine=engine, debug=debug)
if len(mention_raw) == 0:
return None
# group by publication_id, resource_id, version_id to aggregate matched_aliases
mentions_grouped = {}
group_order = set()
for m in mention_raw:
m['mean_confidence'] = float(m['mean_confidence'])
m['match_count'] = int(m['match_count'])
key = (m['publication_id'], m['resource_id'], m['version_id'])
group_order.add(key)
if key not in mentions_grouped:
mentions_grouped[key] = {
'publication_id': m['publication_id'],
'resource_id': m['resource_id'],
'version_id': m['version_id'],
'matched_aliases': [],
}
mentions_grouped[key]['matched_aliases'].append(MatchedAlias({
'matched_alias': m['matched_alias'],
'match_count': m['match_count'],
'mean_confidence': m['mean_confidence']
}))
for k in mentions_grouped:
this_group_match_count, this_group_conf_sum, this_group_conf_n = 0, 0.0, 0
for ma in mentions_grouped[k]['matched_aliases']:
this_group_match_count += ma.match_count
this_group_conf_sum += ma.mean_confidence
this_group_conf_n += 1
mentions_grouped[k]['match_count'] = this_group_match_count
mentions_grouped[k]['mean_confidence'] = (this_group_conf_sum / this_group_conf_n) if this_group_conf_n > 0 else 0.0
# build component objects
mentions = []
cache = {}
for group_key in group_order:
m = mentions_grouped[group_key]
m_obj = {}
if f"pub:{m['publication_id']}" in cache:
m_obj['publication'] = cache[f"pub:{m['publication_id']}"]
else:
m_obj['publication'] = fetch_publication({'id':m['publication_id']}, expanded=expanded, conn=conn, engine=engine, debug=debug)
cache[f"pub:{m['publication_id']}"] = m_obj['publication']
if f"res:{m['resource_id']}" in cache:
m_obj['resource'] = cache[f"res:{m['resource_id']}"]
else:
m_obj['resource'] = fetch_resource({'id':m['resource_id']}, expanded=expanded, conn=conn, engine=engine, debug=debug)
cache[f"res:{m['resource_id']}"] = m_obj['resource']
if f"ver:{m['version_id']}" in cache:
m_obj['version'] = cache[f"ver:{m['version_id']}"]
else:
m_obj['version'] = fetch_version({'id':m['version_id']}, conn=conn, engine=engine, debug=debug)
cache[f"ver:{m['version_id']}"] = m_obj['version']
m_obj['matched_aliases'] = m['matched_aliases'][::-1] # reverse order to have highest count first
m_obj['match_count'] = m['match_count']
m_obj['mean_confidence'] = m['mean_confidence']
mentions.append(ResourceMention(m_obj))
return mentions