Source code for cocoAPI.cocoSearch

from cocoAPI.cocoBase import cocoBase
from cocoAPI import default_search_requests
import time


[docs] class cocoSearch( cocoBase ): """ Class for COCONUT API search endpoints. """ def __init__( self, cocoLog ): # inherits session, api_url super().__init__(cocoLog) # default search request body self.default_citations_search_req = default_search_requests.default_citations_search_req self.default_collections_search_req = default_search_requests.default_collections_search_req self.default_molecules_search_req = default_search_requests.default_molecules_search_req self.default_organisms_search_req = default_search_requests.default_organisms_search_req self.default_properties_search_req = default_search_requests.default_properties_search_req self.default_reports_search_req = default_search_requests.default_reports_search_req
[docs] def query( self, resource_endpoint, search_query, sleep_time = 0 ): """ Performs COCONUT search request and returns the json response. Parameters ---------- resource_endpoint COCONUT resource to search search_query List of entries, where each entry has format [key, field, value] sleep_time Time to sleep between requests to avoid rate limiting Returns ------- dict Complete results from search request error Raises errors if found """ # check search query self._check_search_query( resource_endpoint = resource_endpoint, search_query = search_query ) # build search request self.search_req = self._build_search_req( resource_endpoint = resource_endpoint, search_query = search_query ) # execute search request return self._paginate_search_data( endpoint = f"{resource_endpoint}/search", json_body = self.search_req, sleep_time = sleep_time )
[docs] def _check_search_query( self, resource_endpoint, search_query ): """ Performs several checks on `search_query` to ensure correct format. Parameters ---------- resource_endpoint COCONUT API endpoint to search search_query List of entries, where each entry has format [key, field, value] Returns ------- error Raises errors if found """ # check `search_query` structure if not isinstance( search_query, list ) or not all( isinstance( entry, list ) and len(entry) == 3 for entry in search_query ): raise TypeError( "`search_query` must be a list of [key, field, value]" ) # use default search request to get keys & fields attr_name = f"default_{resource_endpoint}_search_req" resource_search_req = getattr( self, attr_name, None ) # check keys resource_keys = resource_search_req["search"].keys() if not all( entry[0] in resource_keys for entry in search_query ): raise ValueError( f"keys must be a one of: {resource_keys}" ) # check fields; list needed to append None resource_fields = list( self._get( endpoint = resource_endpoint )["data"]["fields"] ) resource_fields.append( None ) if not all( entry[1] in resource_fields for entry in search_query ): raise ValueError( f"fields must be a one of: {resource_fields}" ) # check values for entry in search_query: if entry[0] == "select": if not isinstance( entry[2], None ): raise ValueError( f"for select entry, value must be None" ) if entry[0] == "page" or entry[0] == "limit": if entry[1] is not None: raise ValueError( f"for `page` or `limit` key, field must be None" ) if not isinstance( entry[2], int ): raise ValueError( f"for `page` or `limit` key, value must be integer" )
[docs] def _build_search_req( self, resource_endpoint, search_query ): """ Builds search request from a `search_query` list of entries, where each entry has format [key, field, value]. Parameters ---------- resource_endpoint COCONUT API endpoint to search search_query List of entries, where each entry has format [key, field, value] Returns ------- dict Search request from `search_query` """ # init search_request search_req = { "search": {} } for entry in search_query: key, field, value = entry if key in ["filters", "sorts", "selects"]: if key == "filters": search_req["search"].setdefault( "filters", [] ).append( { "field": field, "operator": "=", "value": value } ) elif key == "sorts": search_req["search"].setdefault( "sorts", [] ).append( { "field": field, "direction": value } ) elif key == "selects": search_req["search"].setdefault( "selects", [] ).append( { "field": field } ) else: # simple key like "page", "limit" search_req["search"][key] = value return search_req
[docs] def _paginate_search_data( self, endpoint, json_body, sleep_time ): """ Performs pagination on the data returned from the COCONUT API search request. Parameters ---------- endpoint COCONUT API endpoint json_body JSON body for the search request sleep_time Time to sleep between requests to avoid rate limiting Returns ------- dict Complete results from the COCONUT API search request error Raises errors if found """ # checks if not isinstance( json_body, dict ): raise TypeError( "`json_body` must be a dictionary." ) # pagination input # create copy to modify page # create page if not present; page is below search json_copy = json_body.copy() json_copy.setdefault( "search", {} ) \ .setdefault( "page", 1 ) # paginate all_data = [] while True: # progress curr_pg = json_copy["search"]["page"] # request response = self._post( endpoint = endpoint, json_body = json_copy ) # data pg_data = response.get( "data", [] ) if not pg_data: print( f"Warning: Empty data returned on page {curr_pg}. Pagination stopped." ) break all_data.extend( pg_data ) # update progress last_pg = response["last_page"] print( f"Retrieved page {curr_pg} of {last_pg}.", end = "\r", flush = False ) # check progress if curr_pg == last_pg: break json_copy["search"]["page"] += 1 # sleep to avoid rate limiting time.sleep(sleep_time) # return json data return all_data
[docs] def get_all_records( self, resource_endpoint, pg_limit = 25, sleep_time = 0 ): """ Get all records from COCONUT API endpoint to search. Parameters ---------- resource_endpoint COCONUT API endpoint to search pg_limit Number of results per page sleep_time Time to sleep to avoid rate limiting Returns ------- dict Complete results from search request error Raises errors if found """ # request json all_records_req = { "search": { "filters": [], "page": 1, "limit": pg_limit } } # request data all_records_data = self._paginate_search_data( endpoint = f"{resource_endpoint}/search", json_body = all_records_req, sleep_time = sleep_time ) # return data return all_records_data