From 9814c04cc1cad41a29fc4816cc4941b836c6c7dc Mon Sep 17 00:00:00 2001 From: Natalie Diaz Date: Fri, 23 Jan 2026 12:40:41 -0800 Subject: [PATCH] Remove obsolete code for deprecated python client --- README.md | 20 + datacommons/README.md | 44 +- datacommons/__init__.py | 45 -- datacommons/core.py | 256 ----------- datacommons/examples/__init__.py | 13 - datacommons/examples/core.py | 59 --- datacommons/examples/places.py | 83 ---- datacommons/examples/query.py | 46 -- datacommons/examples/stat_vars.py | 222 --------- datacommons/key.py | 28 -- datacommons/node.py | 90 ---- datacommons/places.py | 268 ----------- datacommons/requests.py | 42 -- datacommons/setup.py | 59 --- datacommons/sparql.py | 96 ---- datacommons/stat_vars.py | 259 ----------- datacommons/test/__init__.py | 13 - datacommons/test/core_test.py | 499 --------------------- datacommons/test/node_test.py | 256 ----------- datacommons/test/places_test.py | 462 ------------------- datacommons/test/set_api_key_test.py | 34 -- datacommons/test/sparql_test.py | 116 ----- datacommons/test/stat_vars_test.py | 361 --------------- datacommons/utils.py | 137 ------ datacommons_client/README.md | 15 - datacommons_pandas/README.md | 43 +- datacommons_pandas/__init__.py | 44 -- datacommons_pandas/core.py | 1 - datacommons_pandas/df_builder.py | 321 ------------- datacommons_pandas/examples/__init__.py | 13 - datacommons_pandas/examples/df_builder.py | 137 ------ datacommons_pandas/key.py | 1 - datacommons_pandas/node.py | 1 - datacommons_pandas/places.py | 1 - datacommons_pandas/requests.py | 1 - datacommons_pandas/setup.py | 59 --- datacommons_pandas/sparql.py | 1 - datacommons_pandas/stat_vars.py | 1 - datacommons_pandas/test/__init__.py | 13 - datacommons_pandas/test/df_builder_test.py | 320 ------------- datacommons_pandas/utils.py | 1 - docs/release.md | 58 --- 42 files changed, 22 insertions(+), 4517 deletions(-) create mode 100644 README.md delete mode 100644 datacommons/__init__.py delete mode 100644 datacommons/core.py delete mode 100644 datacommons/examples/__init__.py delete mode 100644 datacommons/examples/core.py delete mode 100644 datacommons/examples/places.py delete mode 100644 datacommons/examples/query.py delete mode 100644 datacommons/examples/stat_vars.py delete mode 100644 datacommons/key.py delete mode 100644 datacommons/node.py delete mode 100644 datacommons/places.py delete mode 100644 datacommons/requests.py delete mode 100644 datacommons/setup.py delete mode 100644 datacommons/sparql.py delete mode 100644 datacommons/stat_vars.py delete mode 100644 datacommons/test/__init__.py delete mode 100644 datacommons/test/core_test.py delete mode 100644 datacommons/test/node_test.py delete mode 100644 datacommons/test/places_test.py delete mode 100644 datacommons/test/set_api_key_test.py delete mode 100644 datacommons/test/sparql_test.py delete mode 100644 datacommons/test/stat_vars_test.py delete mode 100644 datacommons/utils.py delete mode 100644 datacommons_pandas/__init__.py delete mode 120000 datacommons_pandas/core.py delete mode 100644 datacommons_pandas/df_builder.py delete mode 100644 datacommons_pandas/examples/__init__.py delete mode 100644 datacommons_pandas/examples/df_builder.py delete mode 120000 datacommons_pandas/key.py delete mode 120000 datacommons_pandas/node.py delete mode 120000 datacommons_pandas/places.py delete mode 120000 datacommons_pandas/requests.py delete mode 100644 datacommons_pandas/setup.py delete mode 120000 datacommons_pandas/sparql.py delete mode 120000 datacommons_pandas/stat_vars.py delete mode 100644 datacommons_pandas/test/__init__.py delete mode 100644 datacommons_pandas/test/df_builder_test.py delete mode 120000 datacommons_pandas/utils.py diff --git a/README.md b/README.md new file mode 100644 index 00000000..08d1f9d1 --- /dev/null +++ b/README.md @@ -0,0 +1,20 @@ +# Data Commons Python API + +This is a Python library for accessing data in the Data Commons Graph. + +See the `datacommons-client` [README](datacommons_client/README.md) for details on installation and usage. + +## About Data Commons + +[Data Commons](https://datacommons.org/) is an open knowledge repository that +provides a unified view across multiple public data sets and statistics. You can +view what [datasets](https://datacommons.org/datasets) are currently ingested +and browse the graph using our [browser](https://datacommons.org/browser). + +## License + +Apache 2.0 + +## Support + +For questions, please send an email to `support@datacommons.org`. diff --git a/datacommons/README.md b/datacommons/README.md index 29ceb49c..218c00d4 100644 --- a/datacommons/README.md +++ b/datacommons/README.md @@ -1,43 +1 @@ -**DEPRECATED: This library is no longer maintained. Please migrate to the [datacommons_client](https://pypi.org/project/datacommons-client/) library. For help on translating your requests, see the [Migration guide](https://docs.datacommons.org/api/python/v2/migration.html).** - -# Data Commons Python API - -This is a Python library for accessing data in the Data Commons Graph. - -> See also: [Data Commons Pandas API](../datacommons_pandas/README.md). - -To get started, install this package from pip. - -```bash -pip install datacommons -``` - -Once the package is installed, import `datacommons`. - -```python -import datacommons as dc -``` - -For more detail on getting started with the API, please visit our -[API Overview](https://docs.datacommons.org/api/). - -When you are ready to use the API, you can refer to `examples` for -examples on how to use this package to perform various tasks. More tutorials and -documentation can be found on our [tutorials page](https://docs.datacommons.org/tutorials/)! - -## About Data Commons - -[Data Commons](https://datacommons.org/) is an open knowledge repository that -provides a unified view across multiple public data sets and statistics. You can -view what [datasets](https://datacommons.org/datasets) are currently ingested -and browse the graph using our [browser](https://datacommons.org/browser). - -## License - -Apache 2.0 - -## Support - -For general questions or issues about the API, please open an issue on our -[issues](https://github.com/google/datacommons/issues) page. For all other -questions, please send an email to `support@datacommons.org`. +**DEPRECATED: This library has been deprecated. Please migrate to the [datacommons_client](https://pypi.org/project/datacommons-client/) library. For help on translating your requests, see the [Migration guide](https://docs.datacommons.org/api/python/v2/migration.html).** diff --git a/datacommons/__init__.py b/datacommons/__init__.py deleted file mode 100644 index a156ebbd..00000000 --- a/datacommons/__init__.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# isort: skip_file - -import warnings - -warnings.warn( - "\n\n'datacommons' is deprecated and will no longer be updated.\n" - "Please migrate to the 'datacommons_client' package.\n" - "Migration guide: https://docs.datacommons.org/api/python/v2/migration.html\n" - "Contact support@datacommons.org with any questions.\n", - category=DeprecationWarning, - stacklevel=2) - -################################## IMPORTANT ################################# -# All user-facing functions in this package must be symlinked to the # -# datacommons_pandas pkg. This is so that users do not need to import both # -# libraries for pd support. Please keep the below imports in sync with the # -# __init__.py in the datacommons_pandas/ dir, and add a symlink when # -# creating a new file. # -# TODO: https://github.com/datacommonsorg/api-python/issues/149 # -############################################################################## - -# Data Commons SPARQL query support -from datacommons.sparql import query - -# Data Commons Python API -from datacommons.core import get_property_labels, get_property_values, get_triples -from datacommons.places import get_places_in, get_related_places, get_stats -from datacommons.stat_vars import get_stat_value, get_stat_series, get_stat_all - -from datacommons.key import set_api_key -from datacommons.node import properties, property_values, triples diff --git a/datacommons/core.py b/datacommons/core.py deleted file mode 100644 index 5434599b..00000000 --- a/datacommons/core.py +++ /dev/null @@ -1,256 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Python API Core. - -Provides primitive operations for working with collections of nodes. For a -collection of nodes identified by their dcids, this submodule implements the -following: - -- Getting all property labels -- Getting all property values -- Getting all triples -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import defaultdict - -import datacommons.utils as utils - -# ----------------------------- WRAPPER FUNCTIONS ----------------------------- - - -def get_property_labels(dcids, out=True): - """ Returns the labels of properties defined for the given :code:`dcids`. - - Args: - dcids (:obj:`iterable` of :obj:`str`): A list of nodes identified by their - dcids. - out (:obj:`bool`, optional): Whether or not the property points away from - the given list of nodes. - - Returns: - A :obj:`dict` mapping dcids to lists of property labels. If `out` is `True`, - then property labels correspond to edges directed away from given nodes. - Otherwise, they correspond to edges directed towards the given nodes. - - Raises: - ValueError: If the payload returned by the Data Commons REST API is - malformed. - - Examples: - To get all outgoing property labels for - `California `_ and - `Colorado `_, we can write - the following. - - >>> get_property_labels(['geoId/06', 'geoId/08']) - { - "geoId/06": [ - "containedInPlace", - "geoId", - "kmlCoordinates", - "name", - "provenance", - "typeOf" - ], - "geoId/08",: [ - "containedInPlace", - "geoId", - "kmlCoordinates", - "name", - "provenance", - "typeOf" - ] - } - - We can also get incoming property labels by setting `out=False`. - - >>> get_property_labels(['geoId/06', 'geoId/08'], out=False) - { - "geoId/06": [ - "addressRegion", - "containedInPlace", - "location", - "overlapsWith" - ], - "geoId/08",: [ - "addressRegion", - "containedInPlace", - "location", - "overlapsWith" - ] - } - """ - # Generate the GetProperty query and send the request - dcids = filter(lambda v: v == v, dcids) # Filter out NaN values - dcids = list(dcids) - url = utils._API_ROOT + utils._API_ENDPOINTS['get_property_labels'] - payload = utils._send_request(url, req_json={'dcids': dcids}) - - # Return the results based on the orientation - results = {} - for dcid in dcids: - if out: - results[dcid] = payload[dcid]['outLabels'] - else: - results[dcid] = payload[dcid]['inLabels'] - return results - - -def get_property_values(dcids, - prop, - out=True, - value_type=None, - limit=utils._MAX_LIMIT): - """ Returns property values of given :code:`dcids` along the given property. - - Args: - dcids (:obj:`iterable` of :obj:`str`): dcids to get property values for. - prop (:obj:`str`): The property to get property values for. - out (:obj:`bool`, optional): A flag that indicates the property is directed - away from the given nodes when set to true. - value_type (:obj:`str`, optional): A type to filter returned property values - by. - limit (:obj:`int`, optional): The maximum number of property values returned - aggregated over all given nodes. - - Returns: - Returned property values are formatted as a :obj:`dict` from a given dcid - to a list of its property values. - - Raises: - ValueError: If the payload returned by the Data Commons REST API is - malformed. - - Examples: - We would like to get the `name` of a list of states specified by their dcid: - `geoId/06 `_, - `geoId/21 `_, and - `geoId/24 `_ - - First, let's try specifying the :code:`dcids` as a :obj:`list` of - :obj:`str`. - - >>> get_property_values(["geoId/06", "geoId/21", "geoId/24"], "name") - { - "geoId/06": ["California"], - "geoId/21": ["Kentucky"], - "geoId/24": ["Maryland"], - } - """ - # Convert the dcids field and format the request to GetPropertyValue - dcids = filter(lambda v: v == v, dcids) # Filter out NaN values - dcids = list(dcids) - if out: - direction = 'out' - else: - direction = 'in' - - req_json = { - 'dcids': dcids, - 'property': prop, - 'limit': limit, - 'direction': direction - } - if value_type: - req_json['value_type'] = value_type - - # Send the request - url = utils._API_ROOT + utils._API_ENDPOINTS['get_property_values'] - payload = utils._send_request(url, req_json=req_json) - - # Create the result format for when dcids is provided as a list. - unique_results = defaultdict(set) - for dcid in dcids: - # Get the list of nodes based on the direction given. - nodes = [] - if out: - if dcid in payload and 'out' in payload[dcid]: - nodes = payload[dcid]['out'] - else: - if dcid in payload and 'in' in payload[dcid]: - nodes = payload[dcid]['in'] - - # Add nodes to unique_results if it is not empty - for node in nodes: - if 'dcid' in node: - unique_results[dcid].add(node['dcid']) - elif 'value' in node: - unique_results[dcid].add(node['value']) - - # Make sure each dcid is in the results dict, and convert all sets to lists. - results = {dcid: sorted(list(unique_results[dcid])) for dcid in dcids} - - return results - - -def get_triples(dcids, limit=utils._MAX_LIMIT): - """ Returns all triples associated with the given :code:`dcids`. - - A knowledge graph can be described as a collection of `triples` which are - 3-tuples that take the form `(s, p, o)`. Here `s` and `o` are nodes in the - graph called the *subject* and *object* respectively while `p` is the property - label of a directed edge from `s` to `o` (sometimes also called the - *predicate*). - - Args: - dcids (:obj:`iterable` of :obj:`str`): A list of dcids to get triples for. - limit (:obj:`int`, optional): The maximum total number of triples to get. - - Returns: - A :obj:`dict` mapping dcids to a :obj:`list` of triples `(s, p, o)` where - `s`, `p`, and `o` are instances of :obj:`str` and either the subject - or object is the mapped dcid. - - Raises: - ValueError: If the payload returned by the Data Commons REST API is - malformed. - - Examples: - We would like to get five triples associated with - `California `_ - - >>> get_triples(["geoId/06"], limit=5) - { - "geoId/06": [ - ("geoId/06", "name", "California"), - ("geoId/06", "typeOf", "State"), - ("geoId/06", "geoId", "06"), - ("geoId/0687056", "containedInPlace", "geoId/06"), - ("geoId/0686440", "containedInPlace", "geoId/06") - ] - } - """ - # Generate the GetTriple query and send the request. - dcids = filter(lambda v: v == v, dcids) # Filter out NaN values - dcids = list(dcids) - url = utils._API_ROOT + utils._API_ENDPOINTS['get_triples'] - payload = utils._send_request(url, req_json={'dcids': dcids, 'limit': limit}) - - # Create a map from dcid to list of triples. - results = defaultdict(list) - for dcid in dcids: - # Make sure each dcid is mapped to an empty list. - results[dcid] - - # Add triples as appropriate - for t in payload[dcid]: - if 'objectId' in t: - results[dcid].append((t['subjectId'], t['predicate'], t['objectId'])) - elif 'objectValue' in t: - results[dcid].append((t['subjectId'], t['predicate'], t['objectValue'])) - return dict(results) diff --git a/datacommons/examples/__init__.py b/datacommons/examples/__init__.py deleted file mode 100644 index 7c07b241..00000000 --- a/datacommons/examples/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/datacommons/examples/core.py b/datacommons/examples/core.py deleted file mode 100644 index 3510893f..00000000 --- a/datacommons/examples/core.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Python API examples. - -Basic demo for get_property_labels, get_property_values, and get_triples. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import datacommons as dc - - -def main(): - # Set the dcid to be that of Santa Clara County. - dcids = ['geoId/06085', 'dc/p/zsb968m3v1f97'] - - # Print all incoming and outgoing properties from Santa Clara County. - print('Property Labels for Santa Clara County') - in_labels = dc.get_property_labels(dcids) - out_labels = dc.get_property_labels(dcids, out=False) - print('> Printing properties for {}'.format(dcids)) - print('> Incoming properties: {}'.format(in_labels)) - print('> Outgoing properties: {}'.format(out_labels)) - - # Print all property values for "containedInPlace" for Santa Clara County. - print('Property Values for "containedInPlace" of Santa Clara County') - prop_vals = dc.get_property_values(dcids, - 'containedInPlace', - out=False, - value_type='City') - print('> Cities contained in {}'.format(dcids)) - for dcid in dcids: - for city_dcid in prop_vals[dcid]: - print(' - {}'.format(city_dcid)) - - # Print the first 10 triples associated with Santa Clara County - print('Triples for Santa Clara County') - triples = dc.get_triples(dcids) - for dcid in dcids: - print('> Triples for {}'.format(dcid)) - for s, p, o in triples[dcid][:5]: - print(' - ("{}", {}, "{}")'.format(s, p, o)) - - -if __name__ == '__main__': - main() diff --git a/datacommons/examples/places.py b/datacommons/examples/places.py deleted file mode 100644 index 00b2328d..00000000 --- a/datacommons/examples/places.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Python API examples. - -Basic demo for get_places_in -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import datacommons as dc - - -def main(): - # Create a list of dcids for Santa Clara and Montgomery County. - sc, mc = 'geoId/06085', 'geoId/24031' - dcids = [sc, mc] - - # Get all CensusTracts in these two counties. - print('Get Census Tracts') - tracts = dc.get_places_in(dcids, 'CensusTract') - if sc in tracts: - print('> 10 CensusTracts in Santa Clara County') - for dcid in tracts[sc][:10]: - print(' - {}'.format(dcid)) - if mc in tracts: - print('> 10 CensusTracts in Montgomery County') - for dcid in tracts[mc][:10]: - print(' - {}'.format(dcid)) - - # Get place stats. - print('Get place stats -- all') - stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'], - 'dc/0hyp6tkn18vcb', - obs_dates='all') - print(stats) - - print('Get place stats -- latest') - stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'], - 'dc/0hyp6tkn18vcb') - print(stats) - - print('Get place stats -- 2014') - stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'], - 'dc/0hyp6tkn18vcb', - obs_dates=['2014']) - print(stats) - - print('Get place stats -- 2014 badly formatted') - stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'], - 'dc/0hyp6tkn18vcb', - obs_dates='2014') - print(stats) - - print('Get place stats -- 2015-2016') - stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'], - 'dc/0hyp6tkn18vcb', - obs_dates=['2015', '2016']) - print(stats) - - # Get related places. - - -# TODO(*): Fix the related places example. -# print('Get related places') -# related_places = dc.get_related_places(['geoId/06085'], 'Person', 'count', -# 'CensusACS5yrSurvey', "measuredValue", {"gender": "Female"}) -# print(related_places) - -if __name__ == '__main__': - main() diff --git a/datacommons/examples/query.py b/datacommons/examples/query.py deleted file mode 100644 index 8be1bfe4..00000000 --- a/datacommons/examples/query.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Python API examples. - -Example on how to use the Client API SPARQL query wrapper. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import datacommons as dc - - -def main(): - # Create a SPARQL query querying for the name of some states - query = (''' -SELECT ?name ?dcid -WHERE { - ?a typeOf Place . - ?a name ?name . - ?a dcid ("geoId/06" "geoId/21" "geoId/24") . - ?a dcid ?dcid -} -''') - print('> Issuing query.\n{}'.format(query)) - - # Iterate through all the rows in the results. - print('> Printing results.\n') - for row in dc.query(query_string=query): - print(' {}'.format(row)) - - -if __name__ == '__main__': - main() diff --git a/datacommons/examples/stat_vars.py b/datacommons/examples/stat_vars.py deleted file mode 100644 index 47a74dfe..00000000 --- a/datacommons/examples/stat_vars.py +++ /dev/null @@ -1,222 +0,0 @@ -# Copyright 2020 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Basic examples for StatisticalVariable-based Data Commons API functions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import pprint - -import datacommons as dc - - -def main(): - param_sets = [ - { - 'place': 'geoId/06085', - 'stat_var': "Count_Person", - }, - { - 'place': 'geoId/06085', - 'stat_var': "Count_Person", - 'date': '2018', - }, - { - 'place': 'geoId/06085', - 'stat_var': "Count_Person", - 'date': '2018', - 'measurement_method': 'CensusACS5yrSurvey', - }, - { - 'place': 'geoId/06085', - 'stat_var': 'UnemploymentRate_Person', - }, - { - 'place': 'geoId/06085', - 'stat_var': 'UnemploymentRate_Person', - 'observation_period': 'P1Y', - }, - { - 'place': 'geoId/06085', - 'stat_var': 'UnemploymentRate_Person', - 'observation_period': 'P1Y', - 'measurement_method': 'BLSSeasonallyUnadjusted', - }, - { - 'place': 'nuts/HU22', - 'stat_var': 'Amount_EconomicActivity_GrossDomesticProduction_Nominal', - }, - { - 'place': 'nuts/HU22', - 'stat_var': 'Amount_EconomicActivity_GrossDomesticProduction_Nominal', - 'observation_period': 'P1Y', - 'unit': 'PurchasingPowerStandard' - }, - ] - - def call_str(pvs): - """Helper function to print the minimal call string.""" - s = "'{}', '{}'".format(pvs.get('place'), pvs.get('stat_var')) - if pvs.get('measurement_method'): - s += ", measurement_method='{}'".format(pvs.get('measurement_method')) - if pvs.get('observation_period'): - s += ", observation_period='{}'".format(pvs.get('observation_period')) - if pvs.get('unit'): - s += ", unit='{}'".format(pvs.get('unit')) - if pvs.get('scaling_factor'): - s += ", scaling_factor={}".format(pvs.get('scaling_factor')) - return s - - for pvs in param_sets: - print('\nget_stat_value({})'.format(call_str(pvs))) - print( - '>>> ', - dc.get_stat_value(pvs.get('place'), - pvs.get('stat_var'), - date=pvs.get('date'), - measurement_method=pvs.get('measurement_method'), - observation_period=pvs.get('observation_period'), - unit=pvs.get('unit'), - scaling_factor=pvs.get('scaling_factor'))) - for pvs in param_sets: - pvs.pop('date', None) - print('\nget_stat_series({})'.format(call_str(pvs))) - print( - '>>> ', - dc.get_stat_series(pvs.get('place'), - pvs.get('stat_var'), - measurement_method=pvs.get('measurement_method'), - observation_period=pvs.get('observation_period'), - unit=pvs.get('unit'), - scaling_factor=pvs.get('scaling_factor'))) - - pp = pprint.PrettyPrinter(indent=4) - print( - '\nget_stat_all(["geoId/06085", "country/FRA"], ["Median_Age_Person", "Count_Person"])' - ) - print('>>> ') - pp.pprint( - dc.get_stat_all(["geoId/06085", "country/FRA"], - ["Median_Age_Person", "Count_Person"])) - - print( - '\nget_stat_all(["badPlaceId", "country/FRA"], ["Median_Age_Person", "Count_Person"])' - ) - print('>>> ') - pp.pprint( - dc.get_stat_all(["badPlaceId", "country/FRA"], - ["Median_Age_Person", "Count_Person"])) - - print('\nWhen no data for get_stat_value') - pp.pprint(dc.get_stat_value('foooo', 'barrrr')) - - print('\nWhen no data for get_stat_series') - pp.pprint(dc.get_stat_series('foobarbar', 'barfoo')) - - print('\nSTRESS TEST FOR GET_STAT_ALL') - try: - dc.get_stat_all( - dc.get_places_in(['country/USA'], 'County')['country/USA'], [ - 'Count_Person', 'LandAreaSqMeter', 'PopulationDensityPerSqMeter', - 'Count_Person_BlackOrAfricanAmericanAlone', - 'PercentBlackOrAfricanAmericanAlone', 'Count_Person_Female', - 'Count_Person_Male', - 'Count_Person_AmericanIndianAndAlaskaNativeAlone', - 'Count_Person_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces', - 'Count_Person_AmericanIndianOrAlaskaNativeAlone', - 'Count_Person_AsianAlone', - 'Count_Person_AsianAloneOrInCombinationWithOneOrMoreOtherRaces', - 'Count_Person_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces', - 'Count_Person_HispanicOrLatino', - 'Count_Person_NativeHawaiianAndOtherPacificIslanderAlone', - 'Count_Person_NativeHawaiianAndOtherPacificIslanderAloneOrInCombinationWithOneOrMoreOtherRaces', - 'Count_Person_NativeHawaiianOrOtherPacificIslanderAlone', - 'Count_Person_SomeOtherRaceAlone', - 'Count_Person_SomeOtherRaceAloneOrInCombinationWithOneOrMoreOtherRaces', - 'Count_Person_TwoOrMoreRaces', 'Count_Person_WhiteAlone', - 'Count_Person_WhiteAloneNotHispanicOrLatino', - 'Count_Person_WhiteAloneOrInCombinationWithOneOrMoreOtherRaces', - 'Count_Person_Upto5Years', 'Count_Person_Upto18Years', - 'Count_Person_65OrMoreYears', 'Count_Person_75OrMoreYears', - 'Count_Person_ForeignBorn', - 'Count_Person_USCitizenByNaturalization', - 'Count_Person_NotAUSCitizen', 'Count_Person_Nonveteran', - 'Count_Person_Veteran', 'Count_Person_NotWorkedFullTime', - 'Count_Person_WorkedFullTime', 'Count_Person_Employed', - 'Count_Person_Unemployed', 'Count_Person_InLaborForce', - 'Count_Person_IncomeOf10000To14999USDollar', - 'Count_Person_IncomeOf15000To24999USDollar', - 'Count_Person_IncomeOf25000To34999USDollar', - 'Count_Person_IncomeOf35000To49999USDollar', - 'Count_Person_IncomeOf50000To64999USDollar', - 'Count_Person_IncomeOf65000To74999USDollar', - 'Count_Person_IncomeOf75000OrMoreUSDollar', - 'Count_Person_IncomeOfUpto9999USDollar', - 'Count_Person_EnrolledInSchool', 'Count_Person_NotEnrolledInSchool', - 'Count_Person_EnrolledInCollegeUndergraduateYears', - 'Count_Person_EnrolledInGrade1ToGrade4', - 'Count_Person_EnrolledInGrade5ToGrade8', - 'Count_Person_EnrolledInGrade9ToGrade12', - 'Count_Person_EnrolledInKindergarten', - 'Count_Person_EnrolledInNurserySchoolPreschool', - 'Count_Person_GraduateOrProfessionalSchool', - 'Count_Person_EducationalAttainment10ThGrade', - 'Count_Person_EducationalAttainment11ThGrade', - 'Count_Person_EducationalAttainment12ThGradeNoDiploma', - 'Count_Person_EducationalAttainment1StGrade', - 'Count_Person_EducationalAttainment2NdGrade', - 'Count_Person_EducationalAttainment3RdGrade', - 'Count_Person_EducationalAttainment4ThGrade', - 'Count_Person_EducationalAttainment5ThGrade', - 'Count_Person_EducationalAttainment6ThGrade', - 'Count_Person_EducationalAttainment7ThGrade', - 'Count_Person_EducationalAttainment8ThGrade', - 'Count_Person_EducationalAttainment9ThGrade', - 'Count_Person_EducationalAttainmentAssociatesDegree', - 'Count_Person_EducationalAttainmentBachelorsDegree', - 'Count_Person_EducationalAttainmentBachelorsDegreeOrHigher', - 'Count_Person_EducationalAttainmentDoctorateDegree', - 'Count_Person_EducationalAttainmentGedOrAlternativeCredential', - 'Count_Person_EducationalAttainmentKindergarten', - 'Count_Person_EducationalAttainmentMastersDegree', - 'Count_Person_EducationalAttainmentNoSchoolingCompleted', - 'Count_Person_EducationalAttainmentNurserySchool', - 'Count_Person_EducationalAttainmentPrimarySchool', - 'Count_Person_EducationalAttainmentProfessionalSchoolDegree', - 'Count_Person_EducationalAttainmentRegularHighSchoolDiploma', - 'Count_Person_EducationalAttainmentSomeCollege1OrMoreYearsNoDegree', - 'Count_Person_EducationalAttainmentSomeCollegeLessThan1Year', - 'Count_Person_Divorced', 'Count_Person_MarriedAndNotSeparated', - 'Count_Person_NeverMarried', 'Count_Person_Separated', - 'Count_Person_Widowed', 'Count_Person_NowMarried', - 'Count_Person_AbovePovertyLevelInThePast12Months', - 'Count_Person_BelowPovertyLevelInThePast12Months', - 'Percent_Person_20OrMoreYears_WithDiabetes', - 'Percent_Person_20OrMoreYears_Obesity', - 'Percent_Person_20OrMoreYears_PhysicalInactivity', - 'Percent_Person_Upto64Years_NoHealthInsurance', 'Median_Age_Person', - 'Median_Income_Person', 'Count_Death', - 'Count_Death_CertainInfectiousParasiticDiseases', - 'Count_Death_DiseasesOfBloodAndBloodFormingOrgansAndImmuneDisorders', - 'Count_Death_DiseasesOfTheRespiratorySystem' - ]) - except ValueError: - print('Stress test for get_stat_all FAILED!') - else: - print('Stress test for get_stat_all succeeded.') - - -if __name__ == '__main__': - main() diff --git a/datacommons/key.py b/datacommons/key.py deleted file mode 100644 index b10c8c51..00000000 --- a/datacommons/key.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright 2022 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" API key related functions. -""" - -import os - -# Environment variable for API key. -_KEY_ENV = 'DC_API_KEY' - - -def set_api_key(api_key): - os.environ[_KEY_ENV] = api_key - - -def get_api_key(): - return os.environ.get(_KEY_ENV, '') diff --git a/datacommons/node.py b/datacommons/node.py deleted file mode 100644 index 5823fe48..00000000 --- a/datacommons/node.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright 2022 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" API to request node information. -""" - -from typing import Dict, List - -from datacommons.requests import _post -from datacommons.utils import _get_arrow -from datacommons.utils import _get_direction - - -def properties(nodes: List[str], is_out: bool = True) -> Dict[str, List[str]]: - """Retrieves all the properties for a list of nodes. - - Note this only returns the property labels, not the values. - Args: - nodes: List of DCIDs. - is_out: Whether to return out going properties. - Returns: - A dict keyed by node DCID, with the values being a list of properties - for the queried node. - """ - resp = _post('/v2/node', {'nodes': nodes, 'property': _get_arrow(is_out)}) - result = {} - for node, item in resp.get('data', {}).items(): - properties = item.get('properties', []) - result[node] = properties - return result - - -def property_values(nodes: List[str], - property: str, - is_out: bool = True) -> Dict[str, List[str]]: - """Retrieves the property values for a list of nodes. - Args: - nodes: List of DCIDs. - property: The property label to query for. - is_out: Whether the property is out going. - Returns: - A dict keyed by node DCID, with the values being a list of values - for the queried property. - """ - resp = _post(f'/v1/bulk/property/values/{_get_direction(is_out)}', { - 'nodes': nodes, - 'property': property, - }) - result = {} - for item in resp.get('data', []): - node, values = item['node'], item.get('values', []) - result[node] = [] - for v in values: - if 'dcid' in v: - result[node].append(v['dcid']) - else: - result[node].append(v['value']) - return result - - -def triples(nodes: List[str], - is_out: bool = True) -> Dict[str, Dict[str, List[object]]]: - """Retrieves the triples for a node. - Args: - nodes: List of DCIDs. - is_out: Whether the returned property is out going for the queried - nodes. - Returns: - A two level dict keyed by node DCID, then by the arc property, with - a list of values or DCIDs. - """ - resp = _post(f'/v1/bulk/triples/{_get_direction(is_out)}', - data={'nodes': nodes}) - result = {} - for item in resp.get('data', []): - node, triples = item['node'], item.get('triples', {}) - result[node] = {} - for property, other_nodes in triples.items(): - result[node][property] = other_nodes.get('nodes', []) - return result diff --git a/datacommons/places.py b/datacommons/places.py deleted file mode 100644 index e19da4a0..00000000 --- a/datacommons/places.py +++ /dev/null @@ -1,268 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Python API Places Module. - -Provides convenience functions for working with Places in the Data Commons -Graph. This submodule implements the ability to access :obj:`Place`'s -within a collection of nodes identified by dcid. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import datacommons.utils as utils - - -def get_places_in(dcids, place_type): - """ Returns :obj:`Place`s contained in :code:`dcids` of type - :code:`place_type`. - - Args: - dcids (:obj:`iterable` of :obj:`str`): Dcids to get contained in places. - place_type (:obj:`str`): The type of places contained in the given dcids to - filter by. - - Returns: - The returned :obj:`Place`'s are formatted as a :obj:`dict` from a given - dcid to a list of places identified by dcids of the given `place_type`. - - Raises: - ValueError: If the payload returned by the Data Commons REST API is - malformed. - - Examples: - We would like to get all Counties contained in - `California `_. Specifying - the :code:`dcids` as a :obj:`list` result in the following. - - >>> get_places_in(["geoId/06"], "County") - { - 'geoId/06': [ - 'geoId/06041', - 'geoId/06089', - 'geoId/06015', - 'geoId/06023', - 'geoId/06067', - ... - # and 53 more - ] - } - """ - dcids = filter(lambda v: v == v, dcids) # Filter out NaN values - dcids = list(dcids) - url = utils._API_ROOT + utils._API_ENDPOINTS['get_places_in'] - payload = utils._send_request(url, - req_json={ - 'dcids': dcids, - 'place_type': place_type, - }) - - # Create the results and format it appropriately - result = utils._format_expand_payload(payload, 'place', must_exist=dcids) - return result - - -def get_stats(dcids, - stats_var, - obs_dates='latest', - measurement_method=None, - unit=None, - obs_period=None): - """ Returns :obj:`TimeSeries` for :code:`dcids` \ - based on the :code:`stats_var`. - - Args: - dcids (:obj:`iterable` of :obj:`str`): Dcids of places to query for. - stats_var (:obj:`str`): The dcid of the :obj:StatisticalVariable. - obs_dates (:obj:`str` or :obj:`iterable` of :obj:`str`): - Which observation to return. - Can be 'latest', 'all', or an iterable of dates in 'YYYY-MM-DD' format. - measurement_method (:obj:`str`): Optional, the dcid of the preferred - `measurementMethod` value. - unit (:obj:`str`): Optional, the dcid of the preferred `unit` value. - obs_period (:obj:`str`): Optional, the dcid of the preferred - `observationPeriod` value. - Returns: - A :obj:`dict` mapping the :obj:`Place` identified by the given :code:`dcid` - to its place name and the :obj:`TimeSeries` associated with the - :obj:`StatisticalVariable` identified by the given :code:`stats_var` - and filtered by :code:`obs_dates` and optional args. - See example below for more detail about how the returned :obj:`dict` is - structured. - - Raises: - ValueError: If the payload returned by the Data Commons REST API is - malformed. - - Examples: - We would like to get the :obj:`TimeSeries` of the number of males - at least 25 years old that attended 12th grade but did not receive - a high school diploma - (`dc/0hyp6tkn18vcb `_) - in `Arkansas `_ - and `California `_. - - >>> get_stats(["geoId/05", "geoId/06"], "dc/0hyp6tkn18vcb") - { - 'geoId/05': { - 'place_name': 'Arkansas' - 'data': { - '2011':18136, - '2012':17279, - '2013':17459, - '2014':16966, - '2015':17173, - '2016':17041, - '2017':17783, - '2018':18003 - }, - }, - 'geoId/05': { - 'place_name': 'California' - 'data': { - '2011':316667, - '2012':324116, - '2013':331853, - '2014':342818, - '2015':348979, - '2016':354806, - '2017':360645, - '2018':366331 - }, - }, - } - """ - dcids = filter(lambda v: v == v, dcids) # Filter out NaN values - dcids = list(dcids) - url = utils._API_ROOT + utils._API_ENDPOINTS['get_stats'] - batches = -(-len(dcids) // utils._QUERY_BATCH_SIZE - ) # Ceil to get # of batches. - res = {} - for i in range(batches): - req_json = { - 'place': - dcids[i * utils._QUERY_BATCH_SIZE:(i + 1) * - utils._QUERY_BATCH_SIZE], - 'stats_var': - stats_var, - } - if measurement_method: - req_json['measurement_method'] = measurement_method - if unit: - req_json['unit'] = unit - if obs_period: - req_json['observation_period'] = obs_period - payload = utils._send_request(url, req_json) - if obs_dates == 'all': - res.update(payload) - elif obs_dates == 'latest': - for geo, stats in payload.items(): - if not stats: - continue - time_series = stats.get('data') - if not time_series: - continue - max_date = max(time_series) - max_date_stat = time_series[max_date] - time_series.clear() - time_series[max_date] = max_date_stat - res[geo] = stats - elif obs_dates: - obs_dates = set(obs_dates) - for geo, stats in payload.items(): - if not stats: - continue - time_series = stats.get('data') - if not time_series: - continue - for date in list(time_series): - if date not in obs_dates: - time_series.pop(date) - res[geo] = stats - return res - - -def get_related_places(dcids, - population_type, - measured_property, - measurement_method, - stat_type, - constraining_properties={}, - within_place='', - per_capita=False, - same_place_type=False): - """ Returns :obj:`Place`s related to :code:`dcids` for the given constraints. - - Args: - dcids (:obj:`iterable` of :obj:`str`): Dcids to get related places. - population_type (:obj:`str`): The type of statistical population. - measured_property (:obj:`str`): The measured property. - measurement_method(:obj:`str`): The measurement method for the observation. - stat_type (:obj:`str`): The statistical type for the observation. - constraining_properties (:obj:`map` from :obj:`str` to :obj:`str`, optional): - A map from constraining property to the value that the - :obj:`StatisticalPopulation` should be constrained by. - within_place(:obj:`str`): Optional, the DCID of the place that all the - related places are contained in. - per_capita(:obj:`bool`): Optional, whether to take into account - `PerCapita` when compute the relatedness. - same_place_type(:obj:`bool`): Optional, whether to require all the - related places under the same place type. - - Returns: - The returned :obj:`Place`'s are formatted as a :obj:`dict` from a given - dcid to a list of related places for the given constraints. - - Raises: - ValueError: If the payload returned by the Data Commons REST API is - malformed. - - Examples: - We would like to get all related places of - `Santa Clara county ` - Specifying the :code:`dcids` as a :obj:`list` result in the following. - - >>> get_related_places(["geoId/06"], "Person", { - "age": "Years21To64", - "gender": "Female" - }, "count", "CenusACS5yrSurvey", "measuredValue") - { - 'geoId/06085': [ - 'geoId/06041', - 'geoId/06089', - 'geoId/06015', - 'geoId/06023', - ] - } - """ - dcids = filter(lambda v: v == v, dcids) # Filter out NaN values - dcids = list(dcids) - url = utils._API_ROOT + utils._API_ENDPOINTS['get_related_places'] - pvs = [] - for p in constraining_properties: - pvs.append({'property': p, 'value': constraining_properties[p]}) - req_json = { - 'dcids': dcids, - 'populationType': population_type, - 'pvs': pvs, - 'measuredProperty': measured_property, - 'statType': '', # TODO: Set to stat_type when having it in BT data. - 'measurementMethod': measurement_method, - 'withinPlace': within_place, - 'perCapita': per_capita, - 'samePlaceType': same_place_type, - } - payload = utils._send_request(url, req_json=req_json) - return payload diff --git a/datacommons/requests.py b/datacommons/requests.py deleted file mode 100644 index d6b47bf5..00000000 --- a/datacommons/requests.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2022 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Send http requests to Data Commons REST API endpoints. -""" - -from typing import Dict - -import requests - -import datacommons.key as key - -# REST API endpoint root -_API_ROOT = "https://api.datacommons.org" - - -def _post(path: str, data={}) -> Dict: - url = _API_ROOT + path - headers = {'Content-Type': 'application/json'} - api_key = key.get_api_key() - if api_key: - headers['x-api-key'] = api_key - try: - resp = requests.post(url, json=data, headers=headers) - if resp.status_code != 200: - raise Exception( - f'{resp.status_code}: {resp.reason}\n{resp.json()["message"]}') - return resp.json() - except requests.exceptions.Timeout: - raise Exception('Data request timed out, please try again.') - except requests.exceptions.RequestException as e: - raise e diff --git a/datacommons/setup.py b/datacommons/setup.py deleted file mode 100644 index e05d54b4..00000000 --- a/datacommons/setup.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Build and distribute the datacommons package to PyPI.""" -import os - -from setuptools import setup - -dir_path = os.path.dirname(os.path.realpath(__file__)) -with open(os.path.join(dir_path, 'README.md'), 'r') as fh: - long_description = fh.read() - -# Package metadata. -NAME = 'datacommons' -DESCRIPTION = 'A library to access Data Commons Python API.' -URL = 'https://github.com/datacommonsorg/api-python' -EMAIL = 'support@datacommons.org' -AUTHOR = 'datacommons.org' -REQUIRES_PYTHON = '>=3.7' -VERSION = '1.4.4' -REQUIRED = ['six', 'requests'] -PACKAGES = ['datacommons'] - -setup( - name=NAME, - version=VERSION, - description=DESCRIPTION, - long_description=long_description, - long_description_content_type='text/markdown', - author=AUTHOR, - author_email=EMAIL, - maintainer=AUTHOR, - maintainer_email=EMAIL, - python_requires=REQUIRES_PYTHON, - url=URL, - packages=PACKAGES, - install_requires=REQUIRED, - include_package_data=True, - license='Apache 2.0', - classifiers=[ - 'Development Status :: 7 - Inactive', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: Implementation :: CPython', - 'Topic :: Software Development', - ], -) diff --git a/datacommons/sparql.py b/datacommons/sparql.py deleted file mode 100644 index b3fab9c5..00000000 --- a/datacommons/sparql.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2022 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Python API Query Module. - -Implements functions for sending graph queries to the Data Commons Graph. -""" - -from datacommons.requests import _post - - -def query(query_string, select=None): - """ Returns the results of executing a SPARQL query on the Data Commons graph. - - Args: - query_string (:obj:`str`): The SPARQL query string. - select (:obj:`func` accepting a row of the query result): A function that - selects rows to be returned by :code:`query`. This function accepts a row - on the results of executing :code:`query_string` and returns True if and - only if the row is to be returned by :code:`query`. The row passed in as - an argument is represented as a :obj:`dict` that maps a query variable in - :code:`query_string` to its value in the given row. - - Returns: - A table, represented as a :obj:`list` of rows, resulting from executing the - given SPARQL query. Each row is a :obj:`dict` mapping query variable to its - value in the row. If `select` is not `None`, then a row is included in the - returned :obj:`list` if and only if `select` returns :obj:`True` for that - row. - - Raises: - ValueError: If the payload returned by the Data Commons REST API is - malformed. - - Examples: - We would like to query for the name associated with three states identified - by their dcids - `California `_, - `Kentucky `_, and - `Maryland `_. - - >>> query_str = ''' - ... SELECT ?name ?dcid - ... WHERE { - ... ?a typeOf Place . - ... ?a name ?name . - ... ?a dcid ("geoId/06" "geoId/21" "geoId/24") . - ... ?a dcid ?dcid - ... } - ... ''' - >>> result = query(query_str) - >>> for r in result: - ... print(r) - {"?name": "Maryland", "?dcid": "geoId/24"} - {"?name": "Kentucky", "?dcid": "geoId/21"} - {"?name": "California", "?dcid": "geoId/06"} - - Optionally, we can specify which rows are returned by setting :code:`select` - like so. The following returns all rows where the name is "Maryland". - - >>> selector = lambda row: row['?name'] == 'Maryland' - >>> result = query(query_str, select=selector) - >>> for r in result: - ... print(r) - {"?name": "Maryland", "?dcid": "geoId/24"} - """ - resp = _post('/query', {'sparql': query_string}) - # Iterate through the query results - header = resp.get('header') - if header is None: - raise ValueError('Ill-formatted response: does not contain a header.') - result_rows = [] - for row in resp.get('rows', []): - # Construct the map from query variable to cell value. - row_map = {} - for idx, cell in enumerate(row.get('cells', [])): - if idx > len(header): - raise ValueError('Query error: unexpected cell {}'.format(cell)) - if 'value' not in cell: - raise ValueError('Query error: cell missing value {}'.format(cell)) - cell_var = header[idx] - row_map[cell_var] = cell['value'] - # Add the row to the result rows if it is selected - if select is None or select(row_map): - result_rows.append(row_map) - return result_rows diff --git a/datacommons/stat_vars.py b/datacommons/stat_vars.py deleted file mode 100644 index 938d4dcc..00000000 --- a/datacommons/stat_vars.py +++ /dev/null @@ -1,259 +0,0 @@ -# Copyright 2020 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Data Commons Python API Stat Module. - -Provides functions for getting data on StatisticalVariables from Data Commons Graph. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - -import six - -import datacommons.utils as utils - -# stat_var specific batch size. -_STAT_BATCH_SIZE = 2000 - - -def get_stat_value(place, - stat_var, - date=None, - measurement_method=None, - observation_period=None, - unit=None, - scaling_factor=None): - """Returns a value for `place` based on the `stat_var`. - - Args: - place (`str`): The dcid of Place to query for. - stat_var (`str`): The dcid of the StatisticalVariable. - date (`str`): Optional, the preferred date of observation - in ISO 8601 format. If not specified, returns the latest observation. - measurement_method (`str`): Optional, the dcid of the preferred - `measurementMethod` value. - observation_period (`str`): Optional, the preferred - `observationPeriod` value. - unit (`str`): Optional, the dcid of the preferred `unit` value. - scaling_factor (`int`): Optional, the preferred `scalingFactor` value. - Returns: - A `float` the value of `stat_var` for `place`, filtered - by optional args. If no data, returns nan. - - Raises: - ValueError: If the payload returned by the Data Commons REST API is - malformed. - - Examples: - >>> get_stat_value("geoId/05", "Count_Person") - 366331 - """ - url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_value'] - url += '?place={}&stat_var={}'.format(place, stat_var) - if date: - url += '&date={}'.format(date) - if measurement_method: - url += '&measurement_method={}'.format(measurement_method) - if observation_period: - url += '&observation_period={}'.format(observation_period) - if unit: - url += '&unit={}'.format(unit) - if scaling_factor: - url += '&scaling_factor={}'.format(scaling_factor) - - try: - res_json = utils._send_request(url, post=False, use_payload=False) - except ValueError: - return float('nan') - if 'value' not in res_json: - return float('nan') - return res_json['value'] - - -def get_stat_series(place, - stat_var, - measurement_method=None, - observation_period=None, - unit=None, - scaling_factor=None): - """Returns a `dict` mapping dates to value of `stat_var` for `place`. - - Args: - place (`str`): The dcid of Place to query for. - stat_var (`str`): The dcid of the StatisticalVariable. - measurement_method (`str`): Optional, the dcid of the preferred - `measurementMethod` value. - observation_period (`str`): Optional, the preferred - `observationPeriod` value. - unit (`str`): Optional, the dcid of the preferred `unit` value. - scaling_factor (`int`): Optional, the preferred `scalingFactor` value. - Returns: - A `dict` mapping dates to value of `stat_var` for `place`, - representing a time series that satisfies all input parameters. - - Raises: - ValueError: If the payload returned by the Data Commons REST API is - malformed. - - Examples: - >>> get_stat_series("geoId/05", "Count_Person") - {"1962":17072000,"2009":36887615,"1929":5531000,"1930":5711000} - """ - url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_series'] - url += '?place={}&stat_var={}'.format(place, stat_var) - if measurement_method: - url += '&measurement_method={}'.format(measurement_method) - if observation_period: - url += '&observation_period={}'.format(observation_period) - if unit: - url += '&unit={}'.format(unit) - if scaling_factor: - url += '&scaling_factor={}'.format(scaling_factor) - - try: - res_json = utils._send_request(url, post=False, use_payload=False) - except ValueError: - return {} - - if 'series' not in res_json: - return {} - return res_json['series'] - - -def get_stat_all(places, stat_vars): - """Returns a nested `dict` of all time series for `places` and `stat_vars`. - - Args: - places (`Iterable` of `str`): The dcids of Places to query for. - stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables. - Returns: - A nested `dict` mapping Places to StatisticalVariables and all available - time series for each Place and StatisticalVariable pair. - - Raises: - ValueError: If the payload returned by the Data Commons REST API is - malformed. - - Examples: - >>> get_stat_all(["geoId/05", "geoId/06"], ["Count_Person", "Count_Person_Male"]) - { - "geoId/05": { - "Count_Person": { - "sourceSeries": [ - { - "val": { - "2010": 1633, - "2011": 1509, - "2012": 1581, - }, - "observationPeriod": "P1Y", - "importName": "Wikidata", - "provenanceDomain": "wikidata.org" - }, - { - "val": { - "2010": 1333, - "2011": 1309, - "2012": 131, - }, - "observationPeriod": "P1Y", - "importName": "CensusPEPSurvey", - "provenanceDomain": "census.gov" - } - ], - } - }, - "Count_Person_Male": { - "sourceSeries": [ - { - "val": { - "2010": 1633, - "2011": 1509, - "2012": 1581, - }, - "observationPeriod": "P1Y", - "importName": "CensusPEPSurvey", - "provenanceDomain": "census.gov" - } - ], - } - }, - "geoId/02": { - "Count_Person": {}, - "Count_Person_Male": { - "sourceSeries": [ - { - "val": { - "2010": 13, - "2011": 13, - "2012": 322, - }, - "observationPeriod": "P1Y", - "importName": "CensusPEPSurvey", - "provenanceDomain": "census.gov" - } - ] - } - } - } - """ - url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_all'] - # Cast iterable-like to list. - places = list(places) - stat_vars = list(stat_vars) - - # Aiming for _STAT_BATCH_SIZE entries total. - # _STAT_BATCH_SIZE = num places x num stat_vars, so aim for - # _STAT_BATCH_SIZE/len(stat_vars) places per batch. - places_per_batch = _STAT_BATCH_SIZE // len(stat_vars) - # Get number of batches via an arithmetic ceiling trick: - # 11//10 rounds down to 1. - # -11//10 rounds down to -2. - # We can divide with, then remove the negative to get the ceiling. - batches = -(-len(places) // places_per_batch) - res = {} - for i in range(batches): - req_json = { - 'stat_vars': stat_vars, - 'places': places[i * places_per_batch:(i + 1) * places_per_batch] - } - # Send the request - res_json = utils._send_request(url, req_json=req_json, use_payload=False) - if 'placeData' not in res_json: - # The REST API spec will always return a dictionary under - # placeData, even if no places exist or have no - # data. If no Places are provided, REST will return an - # error, which will have been caught and passed on in - # _send_request. - raise ValueError("Unexpected response from REST stat/all API.") - - # Unnest the REST response for keys that have single-element values. - place_statvar_series = collections.defaultdict(dict) - for place_dcid, place in res_json['placeData'].items(): - stat_var_data = place.get('statVarData') - if not stat_var_data: - # The REST API spec will always return a dictionary under - # statVarData, even if no StatVars exist or have no - # data. If no StatVars are provided, REST will return an - # error, which will have been caught and passed on in - # _send_request. - raise ValueError("Unexpected response from REST stat/all API.") - for stat_var_dcid, stat_var in stat_var_data.items(): - place_statvar_series[place_dcid][stat_var_dcid] = stat_var - res.update(dict(place_statvar_series)) - - return res diff --git a/datacommons/test/__init__.py b/datacommons/test/__init__.py deleted file mode 100644 index 7c07b241..00000000 --- a/datacommons/test/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/datacommons/test/core_test.py b/datacommons/test/core_test.py deleted file mode 100644 index ce29ca8f..00000000 --- a/datacommons/test/core_test.py +++ /dev/null @@ -1,499 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Python API unit tests. - -Unit tests for core methods in the Data Commons Python API. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -try: - from unittest.mock import patch -except ImportError: - from mock import patch - -import json -import unittest - -import six.moves.urllib as urllib - -import datacommons as dc -import datacommons.utils as utils - - -def request_mock(*args, **kwargs): - """ A mock urlopen in the urllib package. """ - - # Create the mock response object. - class MockResponse: - - def __init__(self, json_data): - self.json_data = json_data - - def read(self): - return self.json_data - - # Get the request data - req = args[0] - data = json.loads(req.data) - - # Mock responses for urlopen requests to get_property_labels. - if req.get_full_url( - ) == utils._API_ROOT + utils._API_ENDPOINTS['get_property_labels']: - if data['dcids'] == ['geoId/0649670']: - # Response for sending a single dcid to get_property_labels - out_arcs = ['containedInPlace', 'name', 'geoId', 'typeOf'] - res_json = json.dumps( - {'geoId/0649670': { - 'inLabels': [], - 'outLabels': out_arcs - }}) - return MockResponse(json.dumps({'payload': res_json})) - elif data['dcids'] == ['State', 'County', 'City']: - # Response for sending multiple dcids to get_property_labels - in_arcs = ['typeOf'] - out_arcs = ['name', 'provenance', 'subClassOf', 'typeOf', 'url'] - res_json = json.dumps({ - 'City': { - 'inLabels': in_arcs, - 'outLabels': out_arcs - }, - 'County': { - 'inLabels': in_arcs, - 'outLabels': out_arcs - }, - 'State': { - 'inLabels': in_arcs, - 'outLabels': out_arcs - } - }) - return MockResponse(json.dumps({'payload': res_json})) - elif data['dcids'] == ['dc/MadDcid']: - # Response for sending a dcid that doesn't exist to get_property_labels - res_json = json.dumps({'dc/MadDcid': {'inLabels': [], 'outLabels': []}}) - return MockResponse(json.dumps({'payload': res_json})) - elif data['dcids'] == []: - # Response for sending no dcids to get_property_labels - res_json = json.dumps({}) - return MockResponse(json.dumps({'payload': res_json})) - - # Mock responses for urlopen requests to get_property_values - if req.get_full_url( - ) == utils._API_ROOT + utils._API_ENDPOINTS['get_property_values']: - if data['dcids'] == ['geoId/06085', 'geoId/24031']\ - and data['property'] == 'containedInPlace'\ - and data['value_type'] == 'Town': - # Response for sending a request for getting Towns containedInPlace of - # Santa Clara County and Montgomery County. - res_json = json.dumps({ - 'geoId/06085': { - 'in': [{ - 'dcid': 'geoId/0644112', - 'name': 'Los Gatos', - 'provenanceId': 'dc/sm3m2w3', - 'types': ['City', 'Town'] - }, { - 'dcid': 'geoId/0643294', - 'name': 'Los Altos Hills', - 'provenanceId': 'dc/sm3m2w3', - 'types': ['City', 'Town'] - }], - 'out': [] - }, - 'geoId/24031': { - 'in': [{ - 'dcid': 'geoId/2462850', - 'name': 'Poolesville', - 'provenanceId': 'dc/sm3m2w3', - 'types': ['City', 'Town'] - },], - 'out': [] - } - }) - return MockResponse(json.dumps({'payload': res_json})) - if data['dcids'] == ['geoId/06085', 'geoId/24031']\ - and data['property'] == 'name': - # Response for sending a request for the name of multiple dcids. - res_json = json.dumps({ - 'geoId/06085': { - 'in': [], - 'out': [{ - 'value': 'Santa Clara County', - 'provenanceId': 'dc/sm3m2w3', - },] - }, - 'geoId/24031': { - 'in': [], - 'out': [{ - 'value': 'Montgomery County', - 'provenanceId': 'dc/sm3m2w3', - },] - } - }) - return MockResponse(json.dumps({'payload': res_json})) - if data['dcids'] == ['dc/p/1234'] and data['property'] == 'name': - # Response for sending a request for the name with no data - res_json = json.dumps({'dc/p/1234': {}}) - return MockResponse(json.dumps({'payload': res_json})) - if data['dcids'] == ['geoId/06085', 'geoId/24031']\ - and data['property'] == 'madProperty': - # Response for sending a request with a property that does not exist. - res_json = json.dumps({ - 'geoId/06085': { - 'in': [], - 'out': [] - }, - 'geoId/24031': { - 'in': [], - 'out': [] - } - }) - return MockResponse(json.dumps({'payload': res_json})) - if data['dcids'] == ['geoId/06085', 'dc/MadDcid']\ - and data['property'] == 'containedInPlace': - # Response for sending a request with a single dcid that does not exist. - res_json = json.dumps({ - 'geoId/06085': { - 'in': [{ - 'dcid': 'geoId/0644112', - 'name': 'Los Gatos', - 'provenanceId': 'dc/sm3m2w3', - 'types': ['City', 'Town'] - },], - 'out': [] - }, - 'dc/MadDcid': { - 'in': [], - 'out': [] - } - }) - return MockResponse(json.dumps({'payload': res_json})) - if data['dcids'] == ['dc/MadDcid', 'dc/MadderDcid']: - # Response for sending a request where both dcids do not exist. - res_json = json.dumps({ - 'dc/MadDcid': { - 'in': [], - 'out': [] - }, - 'dc/MadderDcid': { - 'in': [], - 'out': [] - } - }) - return MockResponse(json.dumps({'payload': res_json})) - if data['dcids'] == [] and data['property'] == 'containedInPlace': - # Response for sending a request where no dcids are given. - res_json = json.dumps({}) - return MockResponse(json.dumps({'payload': res_json})) - - # Mock responses for urlopen requests to get_triples - if req.get_full_url( - ) == utils._API_ROOT + utils._API_ENDPOINTS['get_triples']: - if data['dcids'] == ['geoId/06085', 'geoId/24031']: - # Response for sending a request with two valid dcids. - res_json = json.dumps({ - 'geoId/06085': [ - { - "subjectId": "geoId/06085", - "predicate": "name", - "objectValue": "Santa Clara County" - }, - { - "subjectId": "geoId/0649670", - "subjectName": "Mountain View", - "subjectTypes": ["City"], - "predicate": "containedInPlace", - "objectId": "geoId/06085", - "objectName": "Santa Clara County" - }, - { - "subjectId": "geoId/06085", - "predicate": "containedInPlace", - "objectId": "geoId/06", - "objectName": "California" - }, - ], - 'geoId/24031': [ - { - "subjectId": "geoId/24031", - "predicate": "name", - "objectValue": "Montgomery County" - }, - { - "subjectId": "geoId/2467675", - "subjectName": "Rockville", - "subjectTypes": ["City"], - "predicate": "containedInPlace", - "objectId": "geoId/24031", - "objectName": "Montgomery County" - }, - { - "subjectId": "geoId/24031", - "predicate": "containedInPlace", - "objectId": "geoId/24", - "objectName": "Maryland" - }, - ] - }) - return MockResponse(json.dumps({'payload': res_json})) - if data['dcids'] == ['geoId/06085', 'dc/MadDcid']: - # Response for sending a request where one dcid does not exist. - res_json = json.dumps({ - 'geoId/06085': [ - { - "subjectId": "geoId/06085", - "predicate": "name", - "objectValue": "Santa Clara County" - }, - { - "subjectId": "geoId/0649670", - "subjectName": "Mountain View", - "subjectTypes": ["City"], - "predicate": "containedInPlace", - "objectId": "geoId/06085", - "objectName": "Santa Clara County" - }, - { - "subjectId": "geoId/06085", - "predicate": "containedInPlace", - "objectId": "geoId/06", - "objectName": "California" - }, - ], - 'dc/MadDcid': [] - }) - return MockResponse(json.dumps({'payload': res_json})) - if data['dcids'] == ['dc/MadDcid', 'dc/MadderDcid']: - # Response for sending a request where both dcids do not exist. - res_json = json.dumps({'dc/MadDcid': [], 'dc/MadderDcid': []}) - return MockResponse(json.dumps({'payload': res_json})) - if data['dcids'] == []: - # Response for sending a request where no dcids are given. - res_json = json.dumps({}) - return MockResponse(json.dumps({'payload': res_json})) - - # Otherwise, return an empty response and a 404. - return urllib.error.HTTPError(None, 404, None, None, None) - - -class TestGetPropertyLabels(unittest.TestCase): - """ Unit tests for get_property_labels. """ - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_single_dcid(self, urlopen_mock): - """ Calling get_property_labels with a single dcid returns a valid - result. - """ - # Test for outgoing property labels - out_props = dc.get_property_labels(['geoId/0649670']) - self.assertDictEqual( - out_props, - {'geoId/0649670': ["containedInPlace", "name", "geoId", "typeOf"]}) - - # Test with out=False - in_props = dc.get_property_labels(['geoId/0649670'], out=False) - self.assertDictEqual(in_props, {'geoId/0649670': []}) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_multiple_dcids(self, urlopen_mock): - """ Calling get_property_labels returns valid results with multiple - dcids. - """ - dcids = ['State', 'County', 'City'] - expected_in = ["typeOf"] - expected_out = ["name", "provenance", "subClassOf", "typeOf", "url"] - - # Test for outgoing property labels - out_props = dc.get_property_labels(dcids) - self.assertDictEqual(out_props, { - 'State': expected_out, - 'County': expected_out, - 'City': expected_out, - }) - - # Test for incoming property labels - in_props = dc.get_property_labels(dcids, out=False) - self.assertDictEqual(in_props, { - 'State': expected_in, - 'County': expected_in, - 'City': expected_in, - }) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_bad_dcids(self, urlopen_mock): - """ Calling get_property_labels with dcids that do not exist returns empty - results. - """ - # Test for outgoing property labels - out_props = dc.get_property_labels(['dc/MadDcid']) - self.assertDictEqual(out_props, {'dc/MadDcid': []}) - - # Test for incoming property labels - in_props = dc.get_property_labels(['dc/MadDcid'], out=False) - self.assertDictEqual(in_props, {'dc/MadDcid': []}) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_no_dcids(self, urlopen_mock): - """ Calling get_property_labels with no dcids returns empty results. """ - - # Test for outgoing property labels - out_props = dc.get_property_labels([]) - self.assertDictEqual(out_props, {}) - - # Test for incoming property labels - in_props = dc.get_property_labels([], out=False) - self.assertDictEqual(in_props, {}) - - -class TestGetPropertyValues(unittest.TestCase): - """ Unit tests for get_property_values. """ - - # --------------------------- STANDARD UNIT TESTS --------------------------- - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_multiple_dcids(self, urlopen_mock): - """ Calling get_property_values with multiple dcids returns valid - results. - """ - dcids = ['geoId/06085', 'geoId/24031'] - - # Get the containedInPlace Towns for Santa Clara and Montgomery County. - towns = dc.get_property_values(dcids, - 'containedInPlace', - out=False, - value_type='Town') - self.assertDictEqual( - towns, { - 'geoId/06085': ['geoId/0643294', 'geoId/0644112'], - 'geoId/24031': ['geoId/2462850'] - }) - - dcids = ['geoId/06085', 'geoId/24031', float('nan')] - # Handle NaN values - towns = dc.get_property_values(dcids, - 'containedInPlace', - out=False, - value_type='Town') - self.assertDictEqual( - towns, { - 'geoId/06085': ['geoId/0643294', 'geoId/0644112'], - 'geoId/24031': ['geoId/2462850'] - }) - - # Get the name of Santa Clara and Montgomery County. - names = dc.get_property_values(dcids, 'name') - self.assertDictEqual(names, { - 'geoId/06085': ['Santa Clara County'], - 'geoId/24031': ['Montgomery County'] - }) - - # Return empty result when there is no data. - names = dc.get_property_values(['dc/p/1234'], 'name') - self.assertDictEqual(names, {'dc/p/1234': []}) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_bad_dcids(self, urlopen_mock): - """ Calling get_property_values with dcids that do not exist returns empty - results. - """ - bad_dcids_1 = ['geoId/06085', 'dc/MadDcid'] - bad_dcids_2 = ['dc/MadDcid', 'dc/MadderDcid'] - - # Get entities containedInPlace of Santa Clara County and a dcid that does - # not exist. - contained_1 = dc.get_property_values(bad_dcids_1, - 'containedInPlace', - out=False) - self.assertDictEqual(contained_1, { - 'geoId/06085': ['geoId/0644112'], - 'dc/MadDcid': [] - }) - - # Get entities containedInPlace for two dcids that do not exist. - contained_2 = dc.get_property_values(bad_dcids_2, 'containedInPlace') - self.assertDictEqual(contained_2, {'dc/MadDcid': [], 'dc/MadderDcid': []}) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_bad_property(self, urlopen_mock): - """ Calling get_property_values with a property that does not exist returns - empty results. - """ - # Get propery values for a property that does not exist. - prop_vals = dc.get_property_values(['geoId/06085', 'geoId/24031'], - 'madProperty') - self.assertDictEqual(prop_vals, {'geoId/06085': [], 'geoId/24031': []}) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_no_dcids(self, urlopen_mock): - """ Calling get_property_values with no dcids returns empty results. """ - # Get property values with an empty list of dcids. - prop_vals = dc.get_property_values([], 'containedInPlace') - self.assertDictEqual(prop_vals, {}) - - -class TestGetTriples(unittest.TestCase): - """ Unit tests for get_triples. """ - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_multiple_dcids(self, urlopen_mock): - """ Calling get_triples with proper dcids returns valid results. """ - # Call get_triples - triples = dc.get_triples(['geoId/06085', 'geoId/24031']) - self.assertDictEqual( - triples, { - 'geoId/06085': [ - ('geoId/06085', 'name', 'Santa Clara County'), - ('geoId/0649670', 'containedInPlace', 'geoId/06085'), - ('geoId/06085', 'containedInPlace', 'geoId/06'), - ], - 'geoId/24031': [ - ('geoId/24031', 'name', 'Montgomery County'), - ('geoId/2467675', 'containedInPlace', 'geoId/24031'), - ('geoId/24031', 'containedInPlace', 'geoId/24'), - ] - }) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_bad_dcids(self, urlopen_mock): - """ Calling get_triples with dcids that do not exist returns empty - results. - """ - # Call get_triples where one dcid does not exist - triples_1 = dc.get_triples(['geoId/06085', 'dc/MadDcid']) - self.assertDictEqual( - triples_1, { - 'geoId/06085': [ - ('geoId/06085', 'name', 'Santa Clara County'), - ('geoId/0649670', 'containedInPlace', 'geoId/06085'), - ('geoId/06085', 'containedInPlace', 'geoId/06'), - ], - 'dc/MadDcid': [] - }) - - # Call get_triples where both dcids do not exist - triples_1 = dc.get_triples(['dc/MadDcid', 'dc/MadderDcid']) - self.assertDictEqual(triples_1, {'dc/MadDcid': [], 'dc/MadderDcid': []}) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_no_dcids(self, urlopen_mock): - """ Calling get_triples with no dcids returns empty results. """ - # Call get_triples with no dcids - triples_1 = dc.get_triples([]) - self.assertDictEqual(triples_1, {}) - - -if __name__ == '__main__': - unittest.main() diff --git a/datacommons/test/node_test.py b/datacommons/test/node_test.py deleted file mode 100644 index f2e13d72..00000000 --- a/datacommons/test/node_test.py +++ /dev/null @@ -1,256 +0,0 @@ -# Copyright 2022 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -from unittest.mock import patch - -import datacommons - - -class TestProperties(unittest.TestCase): - - @patch("datacommons.node._post") - def test_with_data(self, _post): - - def side_effect(path, data): - if path == "/v2/node" and data == { - "nodes": ["City", "Count_Person", "foo"], - "property": "->" - }: - return { - "data": { - "City": { - "properties": [ - "name", "provenance", "subClassOf", "typeOf" - ] - }, - "Count_Person": { - "properties": [ - "description", "measuredProperty", "memberOf", "name", - "populationType", "provenance", "statType", "typeOf" - ] - }, - "foo": {} - } - } - - _post.side_effect = side_effect - response = datacommons.properties(["City", "Count_Person", "foo"]) - assert response == { - "City": ["name", "provenance", "subClassOf", "typeOf"], - "Count_Person": [ - "description", "measuredProperty", "memberOf", "name", - "populationType", "provenance", "statType", "typeOf" - ], - "foo": [] - } - - @patch("datacommons.node._post") - def test_with_direction(self, _post): - - def side_effect(path, data): - if path == "/v2/node" and data == { - "nodes": ["City", "Count_Person", "foo"], - "property": "<-" - }: - return { - "data": { - "City": { - "properties": [ - "placeType", "rangeIncludes", "schoolLocationType", - "typeOf" - ] - }, - "Count_Person": { - "properties": [ - "measurementDenominator", "outputProperty", - "relevantVariable" - ] - }, - "foo": {} - } - } - - _post.side_effect = side_effect - response = datacommons.properties(["City", "Count_Person", "foo"], - is_out=False) - assert response == { - "City": ["placeType", "rangeIncludes", "schoolLocationType", "typeOf"], - "Count_Person": [ - "measurementDenominator", "outputProperty", "relevantVariable" - ], - "foo": [] - } - - -class TestPropertyValues(unittest.TestCase): - - @patch("datacommons.node._post") - def test_with_data(self, _post): - - def side_effect(path, data): - print(path) - if path == "/v1/bulk/property/values/out" and data == { - "nodes": ["geoId/06"], - "property": "name", - }: - return { - "data": [{ - "node": - "geoId/06", - "values": [{ - "provenanceId": "dc/5n63hr1", - "value": "California" - }] - }] - } - - _post.side_effect = side_effect - response = datacommons.property_values(["geoId/06"], "name") - assert response == {"geoId/06": ["California"]} - - @patch("datacommons.node._post") - def test_multiple_values(self, _post): - - def side_effect(path, data): - print(path) - if path == "/v1/bulk/property/values/out" and data == { - "nodes": ["geoId/06"], - "property": "geoOverlaps", - }: - return { - "data": [{ - "node": - "geoId/06", - "values": [{ - "provenanceId": "dc/5n63hr1", - "value": "geoId/05" - }, { - "provenanceId": "dc/5n63hr1", - "value": "geoId/07" - }] - }] - } - - _post.side_effect = side_effect - response = datacommons.property_values(["geoId/06"], "geoOverlaps") - assert response == {"geoId/06": ["geoId/05", "geoId/07"]} - - -class TestTriples(unittest.TestCase): - - @patch("datacommons.node._post") - def test_with_data(self, _post): - - def side_effect(path, data): - print(path) - if path == "/v1/bulk/triples/out" and data == { - "nodes": ["Class"], - }: - return { - "data": [{ - "node": "Class", - "triples": { - "typeOf": { - "nodes": [{ - "name": "Class", - "types": ["Class"], - "dcid": "Class", - "provenanceId": "dc/5l5zxr1" - }, { - "name": "Class", - "types": ["Class"], - "dcid": "Class", - "provenanceId": "dc/5l5zxr1" - }] - }, - "isPartOf": { - "nodes": [{ - "provenanceId": "dc/5l5zxr1", - "value": "http://meta.schema.org" - }] - }, - "name": { - "nodes": [{ - "provenanceId": "dc/5l5zxr1", - "value": "Class" - }] - }, - "provenance": { - "nodes": [{ - "name": "BaseSchema", - "types": ["Provenance"], - "dcid": "dc/5l5zxr1", - "provenanceId": "dc/5l5zxr1" - }] - }, - "sameAs": { - "nodes": [{ - "provenanceId": "dc/5l5zxr1", - "value": "http://www.w3.org/2000/01/rdf-schema" - }] - }, - "subClassOf": { - "nodes": [{ - "name": "Intangible", - "types": ["Class"], - "dcid": "Intangible", - "provenanceId": "dc/5l5zxr1" - }] - } - } - }] - } - - _post.side_effect = side_effect - response = datacommons.triples(["Class"]) - assert response == { - "Class": { - 'isPartOf': [{ - 'provenanceId': 'dc/5l5zxr1', - 'value': 'http://meta.schema.org' - }], - 'name': [{ - 'provenanceId': 'dc/5l5zxr1', - 'value': 'Class' - }], - 'provenance': [{ - 'dcid': 'dc/5l5zxr1', - 'name': 'BaseSchema', - 'provenanceId': 'dc/5l5zxr1', - 'types': ['Provenance'] - }], - 'sameAs': [{ - 'provenanceId': 'dc/5l5zxr1', - 'value': 'http://www.w3.org/2000/01/rdf-schema' - }], - 'subClassOf': [{ - 'dcid': 'Intangible', - 'name': 'Intangible', - 'provenanceId': 'dc/5l5zxr1', - 'types': ['Class'] - }], - 'typeOf': [{ - 'dcid': 'Class', - 'name': 'Class', - 'provenanceId': 'dc/5l5zxr1', - 'types': ['Class'] - }, { - 'dcid': 'Class', - 'name': 'Class', - 'provenanceId': 'dc/5l5zxr1', - 'types': ['Class'] - }] - }, - } diff --git a/datacommons/test/places_test.py b/datacommons/test/places_test.py deleted file mode 100644 index c563a433..00000000 --- a/datacommons/test/places_test.py +++ /dev/null @@ -1,462 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Python API unit tests. - -Unit tests for Place methods in the Data Commons Python API. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -try: - from unittest.mock import patch -except ImportError: - from mock import patch - -import json -import unittest - -import six.moves.urllib as urllib - -import datacommons as dc -import datacommons.utils as utils - - -def request_mock(*args, **kwargs): - """ A mock urlopen requests sent in the requests package. """ - - # Create the mock response object. - class MockResponse: - - def __init__(self, json_data): - self.json_data = json_data - - def read(self): - return self.json_data - - req = args[0] - data = json.loads(req.data) - - # Mock responses for urlopen requests to get_places_in. - if req.get_full_url( - ) == utils._API_ROOT + utils._API_ENDPOINTS['get_places_in']: - if (data['dcids'] == ['geoId/06085', 'geoId/24031'] and - data['place_type'] == 'City'): - # Response returned when querying for multiple valid dcids. - res_json = json.dumps([ - { - 'dcid': 'geoId/06085', - 'place': 'geoId/0649670', - }, - { - 'dcid': 'geoId/24031', - 'place': 'geoId/2467675', - }, - { - 'dcid': 'geoId/24031', - 'place': 'geoId/2476650', - }, - ]) - return MockResponse(json.dumps({'payload': res_json})) - if (data['dcids'] == ['geoId/06085', 'dc/MadDcid'] and - data['place_type'] == 'City'): - # Response returned when querying for a dcid that does not exist. - res_json = json.dumps([ - { - 'dcid': 'geoId/06085', - 'place': 'geoId/0649670', - }, - ]) - return MockResponse(json.dumps({'payload': res_json})) - if data['dcids'] == ['dc/MadDcid', 'dc/MadderDcid']\ - and data['place_type'] == 'City': - # Response returned when both given dcids do not exist. - res_json = json.dumps([]) - return MockResponse(json.dumps({'payload': res_json})) - if data['dcids'] == [] and data['place_type'] == 'City': - res_json = json.dumps([]) - # Response returned when no dcids are given. - return MockResponse(json.dumps({'payload': res_json})) - - # Mock responses for urlopen requests to get_stats. - if req.get_full_url() == utils._API_ROOT + utils._API_ENDPOINTS['get_stats']: - if (data['place'] == ['geoId/05', 'geoId/06'] and - data['stats_var'] == 'dc/0hyp6tkn18vcb'): - # Response returned when querying for multiple valid dcids. - res_json = json.dumps({ - 'geoId/05': { - 'data': { - '2011': 18136, - '2012': 17279, - '2013': 17459, - '2014': 16966, - '2015': 17173, - '2016': 17041, - '2017': 17783, - '2018': 18003 - }, - 'place_name': 'Arkansas' - }, - 'geoId/06': { - 'data': { - '2011': 316667, - '2012': 324116, - '2013': 331853, - '2014': 342818, - '2015': 348979, - '2016': 354806, - '2017': 360645, - '2018': 366331 - }, - 'place_name': 'California' - } - }) - return MockResponse(json.dumps({'payload': res_json})) - if (data['place'] == ['geoId/00'] and - data['stats_var'] == 'dc/0hyp6tkn18vcb'): - # No data for the request - res_json = json.dumps({'geoId/00': None}) - return MockResponse(json.dumps({'payload': res_json})) - if ((data['place'] == ['geoId/05', 'dc/MadDcid'] or - data['place'] == ['geoId/05']) and - data['stats_var'] == 'dc/0hyp6tkn18vcb'): - # Response ignores dcid that does not exist. - res_json = json.dumps({ - 'geoId/05': { - 'data': { - '2011': 18136, - '2012': 17279, - '2013': 17459, - '2014': 16966, - '2015': 17173, - '2016': 17041, - '2017': 17783, - '2018': 18003 - }, - 'place_name': 'Arkansas' - } - }) - return MockResponse(json.dumps({'payload': res_json})) - if (data['place'] == ['geoId/06'] and - data['stats_var'] == 'dc/0hyp6tkn18vcb'): - res_json = json.dumps({ - 'geoId/06': { - 'data': { - '2011': 316667, - '2012': 324116, - '2013': 331853, - '2014': 342818, - '2015': 348979, - '2016': 354806, - '2017': 360645, - '2018': 366331 - }, - 'place_name': 'California' - } - }) - return MockResponse(json.dumps({'payload': res_json})) - if (data['place'] == ['dc/MadDcid', 'dc/MadderDcid'] and - data['stats_var'] == 'dc/0hyp6tkn18vcb'): - # Response returned when both given dcids do not exist. - res_json = json.dumps({}) - return MockResponse(json.dumps({'payload': res_json})) - if data['place'] == [] and data['stats_var'] == 'dc/0hyp6tkn18vcb': - res_json = json.dumps({}) - # Response returned when no dcids are given. - return MockResponse(json.dumps({'payload': res_json})) - if (data['place'] == ['geoId/48'] and - data['stats_var'] == 'dc/0hyp6tkn18vcb'): - if (data.get('measurement_method') == 'MM1' and - data.get('unit') == 'Inch' and - data.get('observation_period') == 'P1Y'): - res_json = json.dumps({ - 'geoId/48': { - 'data': { - '2015': 1, - '2016': 1, - }, - 'place_name': 'Texas' - } - }) - elif data.get('measurement_method') == 'MM1': - res_json = json.dumps({ - 'geoId/48': { - 'data': { - '2015': 2, - '2016': 2, - }, - 'place_name': 'Texas' - } - }) - else: - res_json = json.dumps({ - 'geoId/48': { - 'data': { - '2015': 3, - '2016': 3, - }, - 'place_name': 'Texas' - } - }) - - return MockResponse(json.dumps({'payload': res_json})) - - # Otherwise, return an empty response and a 404. - return urllib.error.HTTPError(None, 404, None, None, None) - - -class TestGetPlacesIn(unittest.TestCase): - """ Unit stests for get_places_in. """ - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_multiple_dcids(self, urlopen): - """ Calling get_places_in with proper dcids returns valid results. """ - # Call get_places_in - places = dc.get_places_in(['geoId/06085', 'geoId/24031'], 'City') - self.assertDictEqual( - places, { - 'geoId/06085': ['geoId/0649670'], - 'geoId/24031': ['geoId/2467675', 'geoId/2476650'] - }) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_bad_dcids(self, urlopen): - """ Calling get_places_in with dcids that do not exist returns empty - results. - """ - # Call get_places_in with one dcid that does not exist - bad_dcids_1 = dc.get_places_in(['geoId/06085', 'dc/MadDcid'], 'City') - self.assertDictEqual(bad_dcids_1, { - 'geoId/06085': ['geoId/0649670'], - 'dc/MadDcid': [] - }) - - # Call get_places_in when both dcids do not exist - bad_dcids_2 = dc.get_places_in(['dc/MadDcid', 'dc/MadderDcid'], 'City') - self.assertDictEqual(bad_dcids_2, {'dc/MadDcid': [], 'dc/MadderDcid': []}) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_no_dcids(self, urlopen): - """ Calling get_places_in with no dcids returns empty results. """ - # Call get_places_in with no dcids. - bad_dcids = dc.get_places_in(['dc/MadDcid', 'dc/MadderDcid'], 'City') - self.assertDictEqual(bad_dcids, {'dc/MadDcid': [], 'dc/MadderDcid': []}) - - -class TestGetStats(unittest.TestCase): - """ Unit stests for get_stats. """ - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_multiple_dcids(self, urlopen): - """ Calling get_stats with proper dcids returns valid results. """ - # Call get_stats - stats = dc.get_stats(['geoId/05', 'geoId/06'], 'dc/0hyp6tkn18vcb', 'all') - self.assertDictEqual( - stats, { - 'geoId/05': { - 'data': { - '2011': 18136, - '2012': 17279, - '2013': 17459, - '2014': 16966, - '2015': 17173, - '2016': 17041, - '2017': 17783, - '2018': 18003 - }, - 'place_name': 'Arkansas' - }, - 'geoId/06': { - 'data': { - '2011': 316667, - '2012': 324116, - '2013': 331853, - '2014': 342818, - '2015': 348979, - '2016': 354806, - '2017': 360645, - '2018': 366331 - }, - 'place_name': 'California' - } - }) - - # Call get_stats for latest obs - stats = dc.get_stats(['geoId/05', 'geoId/06'], 'dc/0hyp6tkn18vcb', 'latest') - self.assertDictEqual( - stats, { - 'geoId/05': { - 'data': { - '2018': 18003 - }, - 'place_name': 'Arkansas' - }, - 'geoId/06': { - 'data': { - '2018': 366331 - }, - 'place_name': 'California' - } - }) - - # Call get_stats for specific obs - stats = dc.get_stats(['geoId/05', 'geoId/06'], 'dc/0hyp6tkn18vcb', - ['2013', '2018']) - self.assertDictEqual( - stats, { - 'geoId/05': { - 'data': { - '2013': 17459, - '2018': 18003 - }, - 'place_name': 'Arkansas' - }, - 'geoId/06': { - 'data': { - '2013': 331853, - '2018': 366331 - }, - 'place_name': 'California' - } - }) - - # Call get_stats -- dates must be in interable - stats = dc.get_stats(['geoId/05', 'geoId/06'], 'dc/0hyp6tkn18vcb', '2018') - self.assertDictEqual( - stats, { - 'geoId/05': { - 'data': {}, - 'place_name': 'Arkansas' - }, - 'geoId/06': { - 'data': {}, - 'place_name': 'California' - } - }) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_opt_args(self, urlopen): - """ Calling get_stats with mmethod, unit, and obs period returns specific data. - """ - # Set the API key - dc.set_api_key('TEST-API-KEY') - - # Call get_stats with all optional args - stats = dc.get_stats(['geoId/48'], 'dc/0hyp6tkn18vcb', 'latest', 'MM1', - 'Inch', 'P1Y') - self.assertDictEqual( - stats, {'geoId/48': { - 'data': { - '2016': 1 - }, - 'place_name': 'Texas' - }}) - - # Call get_stats with mmethod specified - stats = dc.get_stats(['geoId/48'], 'dc/0hyp6tkn18vcb', 'latest', 'MM1') - self.assertDictEqual( - stats, {'geoId/48': { - 'data': { - '2016': 2 - }, - 'place_name': 'Texas' - }}) - - # Call get_stats without optional args - stats = dc.get_stats(['geoId/48'], 'dc/0hyp6tkn18vcb', 'latest') - self.assertDictEqual( - stats, {'geoId/48': { - 'data': { - '2016': 3 - }, - 'place_name': 'Texas' - }}) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_bad_dcids(self, urlopen): - """ Calling get_stats with dcids that do not exist returns empty - results. - """ - # Call get_stats with one dcid that does not exist - bad_dcids_1 = dc.get_stats(['geoId/05', 'dc/MadDcid'], 'dc/0hyp6tkn18vcb') - self.assertDictEqual( - bad_dcids_1, - {'geoId/05': { - 'data': { - '2018': 18003 - }, - 'place_name': 'Arkansas' - }}) - - # Call get_stats when both dcids do not exist - bad_dcids_2 = dc.get_stats(['dc/MadDcid', 'dc/MadderDcid'], - 'dc/0hyp6tkn18vcb') - self.assertDictEqual({}, bad_dcids_2) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_no_dcids(self, urlopen): - """ Calling get_stats with no dcids returns empty results. """ - # Call get_stats with no dcids. - no_dcids = dc.get_stats([], 'dc/0hyp6tkn18vcb') - self.assertDictEqual({}, no_dcids) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_no_data(self, urlopen): - """ Calling get_stats with for None data. """ - # Call get_stats with no dcids. - result = dc.get_stats(['geoId/00'], 'dc/0hyp6tkn18vcb') - self.assertDictEqual({}, result) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_batch_request(self, mock_urlopen): - """ Make multiple calls to REST API when number of geos exceeds the batch size. """ - save_batch_size = dc.utils._QUERY_BATCH_SIZE - dc.utils._QUERY_BATCH_SIZE = 1 - - self.assertEqual(0, mock_urlopen.call_count) - stats = dc.get_stats(['geoId/05'], 'dc/0hyp6tkn18vcb', 'latest') - self.assertDictEqual(stats, { - 'geoId/05': { - 'data': { - '2018': 18003 - }, - 'place_name': 'Arkansas' - }, - }) - self.assertEqual(1, mock_urlopen.call_count) - - stats = dc.get_stats(['geoId/05', 'geoId/06'], 'dc/0hyp6tkn18vcb', 'latest') - self.assertDictEqual( - stats, { - 'geoId/05': { - 'data': { - '2018': 18003 - }, - 'place_name': 'Arkansas' - }, - 'geoId/06': { - 'data': { - '2018': 366331 - }, - 'place_name': 'California' - } - }) - self.assertEqual(3, mock_urlopen.call_count) - - dc.utils._QUERY_BATCH_SIZE = save_batch_size - - -if __name__ == '__main__': - unittest.main() diff --git a/datacommons/test/set_api_key_test.py b/datacommons/test/set_api_key_test.py deleted file mode 100644 index b2c7e748..00000000 --- a/datacommons/test/set_api_key_test.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Python API unit tests. - -Unit tests setting the API Key. -""" -import unittest - -import datacommons.key as key - -_KEY = "test-api-key" - - -class TestApiKey(unittest.TestCase): - """Unit test for setting or not setting the API Key.""" - - def test_set_api_key(self): - key.set_api_key(_KEY) - self.assertEqual(key.get_api_key(), _KEY) - - -if __name__ == '__main__': - unittest.main() diff --git a/datacommons/test/sparql_test.py b/datacommons/test/sparql_test.py deleted file mode 100644 index 39daed2b..00000000 --- a/datacommons/test/sparql_test.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright 2022 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Python API unit tests. - -Unit tests for the SPARQL query wrapper. -""" - -import unittest -from unittest.mock import patch - -import datacommons - -_QUERY1 = (''' -SELECT ?name ?dcid -WHERE { - ?a typeOf Place . - ?a name ?name . - ?a dcid ("geoId/06" "geoId/21" "geoId/24") . - ?a dcid ?dcid -} -''') - -_QUERY2 = (''' -SELECT ?name ?dcid -WHERE { - ?a typeOf Place . - ?a name ?name . - ?a dcid ("geoId/DNE") . - ?a dcid ?dcid -} -''') - - -def _post_mock(path, data): - """ A mock function for _post. """ - if path == "/query" and data['sparql'] == _QUERY1: - return { - 'header': ['?name', '?dcid'], - 'rows': [{ - 'cells': [{ - 'value': 'California' - }, { - 'value': 'geoId/06' - }] - }, { - 'cells': [{ - 'value': 'Kentucky' - }, { - 'value': 'geoId/21' - }] - }, { - 'cells': [{ - 'value': 'Maryland' - }, { - 'value': 'geoId/24' - }] - }] - } - if path == "/query" and data['sparql'] == _QUERY2: - return { - 'header': ['?name', '?dcid'], - } - - # Otherwise, return an empty response and a 404. - return Exception('mock exception') - - -class TestQuery(unittest.TestCase): - """ Unit tests for the Query object. """ - - @patch('datacommons.sparql._post') - def test_rows(self, _post): - """ Sending a valid query returns the correct response. """ - _post.side_effect = _post_mock - # Create the SPARQL query - selector = lambda row: row['?name'] != 'California' - # Issue the query - results = datacommons.query(_QUERY1) - selected_results = datacommons.query(_QUERY2, select=selector) - # Execute the query and iterate through the results. - for idx, row in enumerate(results): - if idx == 0: - self.assertDictEqual(row, {'?name': 'California', '?dcid': 'geoId/06'}) - if idx == 1: - self.assertDictEqual(row, {'?name': 'Kentucky', '?dcid': 'geoId/21'}) - if idx == 2: - self.assertDictEqual(row, {'?name': 'Maryland', '?dcid': 'geoId/24'}) - - # Verify that the select function works. - for idx, row in enumerate(selected_results): - if idx == 0: - self.assertDictEqual(row, {'?name': 'Kentucky', '?dcid': 'geoId/21'}) - if idx == 1: - self.assertDictEqual(row, {'?name': 'Maryland', '?dcid': 'geoId/24'}) - - @patch('datacommons.sparql._post') - def test_no_rows(self, _post): - """ Handles row-less response. """ - _post.side_effect = _post_mock - # Issue the query - self.assertEqual(datacommons.query(_QUERY2), []) - - -if __name__ == '__main__': - unittest.main() diff --git a/datacommons/test/stat_vars_test.py b/datacommons/test/stat_vars_test.py deleted file mode 100644 index db5ecc80..00000000 --- a/datacommons/test/stat_vars_test.py +++ /dev/null @@ -1,361 +0,0 @@ -# Copyright 2020 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Python API unit tests. - -Unit tests for StatVar methods in the Data Commons Python API. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -try: - from unittest.mock import patch -except ImportError: - from mock import patch - -import json -import math -import unittest - -import six.moves.urllib as urllib - -import datacommons as dc -import datacommons.utils as utils - -# Reusable parts of REST API /stat/all response. -CA_COUNT_PERSON = { - "isDcAggregate": - "true", - "sourceSeries": [{ - "val": { - "1990": 23640, - "1991": 24100, - "1993": 25090, - }, - "observationPeriod": "P1Y", - "importName": "WorldDevelopmentIndicators", - "provenanceDomain": "worldbank.org" - }, { - "val": { - "1790": 3929214, - "1800": 5308483, - "1810": 7239881, - }, - "measurementMethod": "WikidataPopulation", - "importName": "WikidataPopulation", - "provenanceDomain": "wikidata.org" - }, { - "val": { - "1890": 28360, - "1891": 24910, - "1892": 25070, - }, - "measurementMethod": "OECDRegionalStatistics", - "observationPeriod": "P1Y", - "importName": "OECDRegionalDemography", - "provenanceDomain": "oecd.org" - }] -} - -CA_COUNT_PERSON_MALE = { - "sourceSeries": [{ - "val": { - "1990": 12000, - "1991": 14000, - "1992": 14000, - }, - "measurementMethod": "WikidataPopulation", - "importName": "WikidataPopulation", - "provenanceDomain": "wikidata.org" - },] -} - -HU22_COUNT_PERSON = { - "sourceSeries": [{ - "val": { - "1990": 2360, - "1991": 2410, - "1992": 2500, - }, - "measurementMethod": "OECDRegionalStatistics", - "observationPeriod": "P1Y", - "importName": "OECDRegionalDemography", - "provenanceDomain": "oecd.org" - }] -} - -HU22_COUNT_PERSON_MALE = { - "sourceSeries": [{ - "val": { - "1990": 1360, - "1991": 1410, - "1992": 1500, - }, - "measurementMethod": "OECDRegionalStatistics", - "observationPeriod": "P1Y", - "importName": "OECDRegionalDemography", - "provenanceDomain": "oecd.org" - }] -} - -CA_MEDIAN_AGE_PERSON = { - "sourceSeries": [{ - "val": { - "1990": 12, - "1991": 24, - "1992": 24, - }, - "measurementMethod": "WikidataPopulation", - "importName": "WikidataPopulation", - "provenanceDomain": "wikidata.org" - }] -} - - -def request_mock(*args, **kwargs): - """A mock urlopen requests sent in the requests package.""" - - # Create the mock response object. - class MockResponse: - - def __init__(self, json_data): - self.json_data = json_data - - def read(self): - return self.json_data - - req = args[0] - - stat_value_url_base = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_value'] - stat_series_url_base = utils._API_ROOT + utils._API_ENDPOINTS[ - 'get_stat_series'] - stat_all_url_base = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_all'] - - # Mock responses for urlopen requests to get_stat_value. - if req.get_full_url( - ) == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person': - # Response returned when querying with basic args. - return MockResponse(json.dumps({"value": 123})) - if req.get_full_url( - ) == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person&date=2010': - # Response returned when querying with observationDate. - return MockResponse(json.dumps({"value": 133})) - if (req.get_full_url() == stat_value_url_base + - '?place=geoId/06&stat_var=Count_Person&' + - 'date=2010&measurement_method=CensusPEPSurvey&' + - 'observation_period=P1Y&unit=RealPeople&scaling_factor=100'): - # Response returned when querying with above optional params. - return MockResponse(json.dumps({"value": 103})) - - # Mock responses for urlopen requests to get_stat_series. - if req.get_full_url( - ) == stat_series_url_base + '?place=geoId/06&stat_var=Count_Person': - # Response returned when querying with basic args. - return MockResponse(json.dumps({"series": {"2000": 1, "2001": 2}})) - if (req.get_full_url() == stat_series_url_base + - '?place=geoId/06&stat_var=Count_Person&' + - 'measurement_method=CensusPEPSurvey&observation_period=P1Y&' + - 'unit=RealPeople&scaling_factor=100'): - - # Response returned when querying with above optional params. - return MockResponse(json.dumps({"series": {"2000": 3, "2001": 42}})) - if (req.get_full_url() == stat_series_url_base + - '?place=geoId/06&stat_var=Count_Person&' + 'measurement_method=DNE'): - - # Response returned when data not available for optional parameters. - # /stat/series?place=geoId/06&stat_var=Count_Person&measurement_method=DNE - return MockResponse(json.dumps({"series": {}})) - - # Mock responses for urlopen requests to get_stat_all. - if req.get_full_url() == stat_all_url_base: - data = json.loads(req.data) - if (data['places'] == ['geoId/06', 'nuts/HU22'] and - data['stat_vars'] == ['Count_Person', 'Count_Person_Male']): - # Response returned when querying with above params. - # Response with data for all Place+StatVar combos. - full_resp = { - "placeData": { - "geoId/06": { - "statVarData": { - "Count_Person": CA_COUNT_PERSON, - "Count_Person_Male": CA_COUNT_PERSON_MALE, - } - }, - "nuts/HU22": { - "statVarData": { - "Count_Person": HU22_COUNT_PERSON, - "Count_Person_Male": HU22_COUNT_PERSON_MALE - } - } - } - } - return MockResponse(json.dumps(full_resp)) - - if (data['places'] == ['geoId/06', 'nuts/HU22'] and - data['stat_vars'] == ['Count_Person', 'Median_Age_Person']): - # Response returned when querying with above params. - # Median Age missing for HU22. - resp = { - "placeData": { - "geoId/06": { - "statVarData": { - "Count_Person": CA_COUNT_PERSON, - "Median_Age_Person": CA_MEDIAN_AGE_PERSON - } - }, - "nuts/HU22": { - "statVarData": { - "Count_Person": HU22_COUNT_PERSON, - "Median_Age_Person": {} - } - } - } - } - return MockResponse(json.dumps(resp)) - - if (data['places'] == ['badPlaceId', 'nuts/HU22'] and - data['stat_vars'] == ['Count_Person', 'badStatVarId']): - # Response returned when querying with above params. - # Bad DCIDs for place or statvar. - resp = { - "placeData": { - "badPlaceId": { - "statVarData": { - "Count_Person": {}, - "badStatVarId": {} - } - }, - "nuts/HU22": { - "statVarData": { - "Count_Person": HU22_COUNT_PERSON, - "badStatVarId": {} - } - } - } - } - return MockResponse(json.dumps(resp)) - - # Otherwise, return an empty response and a 404. - return urllib.error.HTTPError(None, 404, None, None, None) - - -class TestGetStatValue(unittest.TestCase): - """Unit tests for get_stat_value.""" - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_basic(self, urlopen): - """Calling get_stat_value with minimal and proper args.""" - # Call get_stat_value - - self.assertEqual(dc.get_stat_value('geoId/06', 'Count_Person'), 123) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_opt_args(self, urlopen): - """Calling get_stat_value with optional args returns specific data.""" - # Call get_stat_value for specific obs - self.assertEqual(dc.get_stat_value('geoId/06', 'Count_Person', '2010'), 133) - - # Call get_stat_value with all optional args - stat = dc.get_stat_value('geoId/06', 'Count_Person', '2010', - 'CensusPEPSurvey', 'P1Y', 'RealPeople', 100) - self.assertEqual(stat, 103) - - # Call get_stat_series with bogus required args - stat = dc.get_stat_value('foofoo', 'barrbar') - self.assertTrue(math.isnan(stat)) - - -class TestGetStatSeries(unittest.TestCase): - """Unit tests for get_stat_series.""" - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_basic(self, urlopen): - """Calling get_stat_value with minimal and proper args.""" - # Call get_stat_series - stats = dc.get_stat_series('geoId/06', 'Count_Person') - self.assertEqual(stats, {"2000": 1, "2001": 2}) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_opt_args(self, urlopen): - """Calling get_stat_value with optional args returns specific data.""" - - # Call get_stat_series with all optional args - stats = dc.get_stat_series('geoId/06', 'Count_Person', 'CensusPEPSurvey', - 'P1Y', 'RealPeople', 100) - self.assertEqual(stats, {"2000": 3, "2001": 42}) - - # Call get_stat_series with bogus required args - stats = dc.get_stat_series('foofoofoo', 'barfoobar') - self.assertEqual(stats, {}) - - # Call get_stat_series with non-satisfiable optional args - stats = dc.get_stat_series('geoId/06', 'Count_Person', 'DNE') - self.assertEqual(stats, {}) - - -class TestGetStatAll(unittest.TestCase): - """Unit tests for get_stat_all.""" - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_basic(self, urlopen): - """Calling get_stat_all with proper args.""" - # Expecting at least one TS per Place+StatVar - stats = dc.get_stat_all(['geoId/06', 'nuts/HU22'], - ['Count_Person', 'Count_Person_Male']) - exp = { - "geoId/06": { - "Count_Person": CA_COUNT_PERSON, - "Count_Person_Male": CA_COUNT_PERSON_MALE, - }, - "nuts/HU22": { - "Count_Person": HU22_COUNT_PERSON, - "Count_Person_Male": HU22_COUNT_PERSON_MALE - } - } - self.assertDictEqual(stats, exp) - # Expecting proper handling of no TS for Place+StatVar combo - stats = dc.get_stat_all(['geoId/06', 'nuts/HU22'], - ['Count_Person', 'Median_Age_Person']) - exp = { - "geoId/06": { - "Count_Person": CA_COUNT_PERSON, - "Median_Age_Person": CA_MEDIAN_AGE_PERSON - }, - "nuts/HU22": { - "Count_Person": HU22_COUNT_PERSON, - "Median_Age_Person": {} - } - } - self.assertDictEqual(stats, exp) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_bad_dcids(self, urlopen): - stats = dc.get_stat_all(['badPlaceId', 'nuts/HU22'], - ['Count_Person', 'badStatVarId']) - exp = { - "badPlaceId": { - "Count_Person": {}, - "badStatVarId": {} - }, - "nuts/HU22": { - "Count_Person": HU22_COUNT_PERSON, - "badStatVarId": {} - } - } - self.assertDictEqual(stats, exp) - - -if __name__ == '__main__': - unittest.main() diff --git a/datacommons/utils.py b/datacommons/utils.py deleted file mode 100644 index 6b644bb0..00000000 --- a/datacommons/utils.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Utilities Library. - -Various functions that can aid in the extension of the Data Commons API. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import base64 -from collections import defaultdict -import json -import os -import zlib - -import six.moves.urllib.error -import six.moves.urllib.request - -# --------------------------------- CONSTANTS --------------------------------- - -# REST API endpoint root -_API_ROOT = "https://api.datacommons.org" - -# REST API endpoint paths -_API_ENDPOINTS = { - 'query': '/query', - 'get_property_labels': '/node/property-labels', - 'get_property_values': '/node/property-values', - 'get_triples': '/node/triples', - 'get_places_in': '/node/places-in', - 'get_related_places': '/node/related-places', - 'get_stats': '/bulk/stats', - 'get_stat_value': '/stat/value', - 'get_stat_series': '/stat/series', - 'get_stat_all': '/stat/all', -} - -# The default value to limit to -_MAX_LIMIT = 100 - -# Batch size for heavyweight queries. -_QUERY_BATCH_SIZE = 500 - -# Environment variable names used by the package -_ENV_VAR_API_KEY = 'DC_API_KEY' - -# ------------------------- INTERNAL HELPER FUNCTIONS ------------------------- - - -def _send_request(req_url, - req_json={}, - compress=False, - post=True, - use_payload=True): - """ Sends a POST/GET request to req_url with req_json, default to POST. - - Returns: - The payload returned by sending the POST/GET request formatted as a dict. - """ - headers = {'Content-Type': 'application/json'} - - # Pass along API key if provided - if os.environ.get(_ENV_VAR_API_KEY): - headers['x-api-key'] = os.environ[_ENV_VAR_API_KEY] - - # Send the request and verify the request succeeded - if post: - req = six.moves.urllib.request.Request( - req_url, data=json.dumps(req_json).encode('utf-8'), headers=headers) - else: - req = six.moves.urllib.request.Request(req_url, headers=headers) - try: - res = six.moves.urllib.request.urlopen(req) - except six.moves.urllib.error.HTTPError as e: - raise ValueError( - 'Response error: An HTTP {} code was returned by the REST API. ' - 'Printing response\n\n{}'.format(e.code, e.read())) - if isinstance(res, six.moves.urllib.error.HTTPError): - raise ValueError( - 'Response error: An HTTP {} code was returned by the REST API. ' - 'Printing response\n\n{}'.format(res.code, res.reason)) - # Get the JSON - res_json = json.loads(res.read()) - if not use_payload: - return res_json - if 'payload' not in res_json: - raise ValueError('Response error: Payload not found. Printing response\n\n' - '{}'.format(res.text)) - - # If the payload is compressed, decompress and decode it - payload = res_json['payload'] - if compress: - payload = zlib.decompress(base64.b64decode(payload), zlib.MAX_WBITS | 32) - return json.loads(payload) - - -def _format_expand_payload(payload, new_key, must_exist=[]): - """ Formats expand type payloads into dicts from dcids to lists of values. """ - # Create the results dictionary from payload - results = defaultdict(set) - for entry in payload: - if 'dcid' in entry and new_key in entry: - dcid = entry['dcid'] - results[dcid].add(entry[new_key]) - - # Ensure all dcids in must_exist have some entry in results. - for dcid in must_exist: - results[dcid] - return {k: sorted(list(v)) for k, v in results.items()} - - -def _get_direction(out: bool): - return "out" if out else "in" - - -def _get_arrow(out: bool): - """Returns the arrow syntax for an arc direction. - - Args: - out: Whether the arc direction is out. - Returns: - The corresponding arrow syntax. - """ - return "->" if out else "<-" diff --git a/datacommons_client/README.md b/datacommons_client/README.md index 7b2e8741..f4563663 100644 --- a/datacommons_client/README.md +++ b/datacommons_client/README.md @@ -22,18 +22,3 @@ import datacommons_client as dc ``` For more detail on getting started with the API, please visit . - -## About Data Commons - -[Data Commons](https://datacommons.org/) is an open knowledge repository that -provides a unified view across multiple public data sets and statistics. You can -view what [datasets](https://datacommons.org/datasets) are currently ingested -and browse the graph using our [browser](https://datacommons.org/browser). - -## License - -Apache 2.0 - -## Support - -For questions, please send an email to `support@datacommons.org`. diff --git a/datacommons_pandas/README.md b/datacommons_pandas/README.md index 437af89f..218c00d4 100644 --- a/datacommons_pandas/README.md +++ b/datacommons_pandas/README.md @@ -1,42 +1 @@ -**DEPRECATED: This library is no longer maintained. Please migrate to the [datacommons_client](https://pypi.org/project/datacommons-client/) library. For help on translating your requests, see the [Migration guide](https://docs.datacommons.org/api/python/v2/migration.html).** - -# Data Commons Pandas API - -This is a Python library for creating pandas objects with data in the -Data Commons Graph. - -To get started, install this package from pip. - -```bash -pip install datacommons_pandas -``` - -Once the package is installed, import `datacommons_pandas`. - -```python -import datacommons_pandas as dcpd -``` - -For more detail on getting started with the API, please visit our -[API Overview](https://docs.datacommons.org/api/pandas/). - -When you are ready to use the API, you can refer to `examples` for -examples on how to use this package to perform various tasks. More tutorials and -documentation can be found on our [tutorials page](https://docs.datacommons.org/tutorials/)! - -## About Data Commons - -[Data Commons](https://datacommons.org/) is an open knowledge repository that -provides a unified view across multiple public data sets and statistics. You can -view what [datasets](https://datacommons.org/datasets) are currently ingested -and browse the graph using our [browser](https://datacommons.org/browser). - -## License - -Apache 2.0 - -## Support - -For general questions or issues about the API, please open an issue on our -[issues](https://github.com/datacommonsorg/api-python/issues) page. For all other -questions, please send an email to `support@datacommons.org`. +**DEPRECATED: This library has been deprecated. Please migrate to the [datacommons_client](https://pypi.org/project/datacommons-client/) library. For help on translating your requests, see the [Migration guide](https://docs.datacommons.org/api/python/v2/migration.html).** diff --git a/datacommons_pandas/__init__.py b/datacommons_pandas/__init__.py deleted file mode 100644 index c593ca80..00000000 --- a/datacommons_pandas/__init__.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2020 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# isort: skip_file - -import warnings - -warnings.warn( - "\n\n'datacommons_pandas' is deprecated and will no longer be updated.\n" - "Please migrate to the 'datacommons_client' package.\n" - "Migration guide: https://docs.datacommons.org/api/python/v2/migration.html\n" - "Contact support@datacommons.org with any questions.\n", - category=DeprecationWarning, - stacklevel=2) - -from datacommons_pandas.df_builder import build_time_series, build_time_series_dataframe, build_multivariate_dataframe - -################################ SYMLINK FILES ################################ -# We include symlinks to all user-facing functions from the datacommons pkg. # -# This is so that users do not need to import both libraries for pd support. # -# Please keep the below in sync with the __init__.py in the datacommons/ dir # -# TODO: enforce this. https://github.com/datacommonsorg/api-python/issues/149 # -##############################################@################################ -# Data Commons SPARQL query support -from datacommons_pandas.sparql import query - -# Data Commons Python API -from datacommons_pandas.core import get_property_labels, get_property_values, get_triples -from datacommons_pandas.places import get_places_in, get_related_places, get_stats -from datacommons_pandas.stat_vars import get_stat_value, get_stat_series, get_stat_all - -from datacommons_pandas.key import set_api_key -from datacommons_pandas.node import properties, property_values, triples diff --git a/datacommons_pandas/core.py b/datacommons_pandas/core.py deleted file mode 120000 index 15f455cf..00000000 --- a/datacommons_pandas/core.py +++ /dev/null @@ -1 +0,0 @@ -../datacommons/core.py \ No newline at end of file diff --git a/datacommons_pandas/df_builder.py b/datacommons_pandas/df_builder.py deleted file mode 100644 index ad2ebef6..00000000 --- a/datacommons_pandas/df_builder.py +++ /dev/null @@ -1,321 +0,0 @@ -# Copyright 2020 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Data Commons Pandas API DataFrame Builder Module. - -Provides functions for building pandas DataFrames using the Data Commons Graph. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - -import pandas as pd -import six - -import datacommons_pandas.stat_vars as dc - - -def build_time_series(place, - stat_var, - measurement_method=None, - observation_period=None, - unit=None, - scaling_factor=None): - """Constructs a pandas Series with `dates` as the index and corresponding `stat_var` statistics as values. - - Args: - place (`str`): The dcid of Place to query for. - stat_var (`str`): The dcid of the StatisticalVariable. - measurement_method (`str`): Optional, the dcid of the preferred - `measurementMethod` value. - observation_period (`str`): Optional, the preferred - `observationPeriod` value. - unit (`str`): Optional, the dcid of the preferred `unit` value. - scaling_factor (`int`): Optional, the preferred `scalingFactor` value. - Returns: - A pandas Series with Place IDs as the index and observed statistics as - values, representing a sorted time series satisfying all optional args. - """ - result_dict = dc.get_stat_series(place, stat_var, measurement_method, - observation_period, unit, scaling_factor) - - # Explicit dtype to avoid warning thrown by pd.Series({}) - if not result_dict: - return pd.Series(result_dict, dtype=object) - else: - return pd.Series(result_dict).sort_index() - - -def _group_stat_all_by_obs_options(places, stat_vars, keep_series=True): - """Groups the result of `get_stat_all` by StatVarObservation options for time series or multivariates. - - Note that this function does not preserve `(place, stat_var)` pairs that - yield no data `from get_stat_all`. In the extreme case that there is no - data for any pairs, raise a ValueError instead of returning an empty dict. - - Args: - places (`str` or `iterable` of `str`): The dcids of Places to query for. - stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables. - keep_series (`boolean`): if True, output time series grouped by - StatVarObservation options; if False, output latest statistics grouped - by StatVarObservation options. - Returns: - A nested dict mapping each StatisticalVariable in `stat_vars` to its - StatVarObservation options. In turn, each StatVarObservation option - maps to a list of rows, one per place, with the place id and stat data. - - Raises: - ValueError: If the payload returned by the Data Commons REST API is - malformed, or if there is no data for any (Place, StatisticalVariables) - pair. - """ - if keep_series: - if len(stat_vars) != 1: - raise ValueError( - 'When `keep_series` is set, only one StatisticalVariable for `stat_vars` is allowed.' - ) - res = collections.defaultdict(list) - else: - res = collections.defaultdict(lambda: collections.defaultdict(list)) - - stat_all = dc.get_stat_all(places, stat_vars) - for place, place_data in stat_all.items(): - if not place_data: - continue - for stat_var, stat_var_data in place_data.items(): - if not stat_var_data: - continue - for source_series in stat_var_data['sourceSeries']: - series = source_series['val'] - # Convert dict of SVO options into nested tuple (hashable key). - obs_options = (('measurementMethod', - source_series.get('measurementMethod')), - ('observationPeriod', - source_series.get('observationPeriod')), - ('unit', source_series.get('unit')), - ('scalingFactor', source_series.get('scalingFactor'))) - if keep_series: - res[obs_options].append(dict({'place': place}, **series)) - else: - date = max(series) - res[stat_var][obs_options].append({ - 'place': place, - 'date': date, - 'val': series[date] - }) - if not res: - raise ValueError( - 'No data for any of specified Places and StatisticalVariables.') - if keep_series: - return dict(res) - else: - return {k: dict(v) for k, v in res.items()} - - -def _time_series_pd_input(places, stat_var): - """Returns a `list` of `dict` per element of `places` based on the `stat_var`. - - Data Commons will pick a set of StatVarObservation options that covers the - maximum number of queried places. Among ties, Data Commons selects an option - set with the latest Observation. - - Args: - places (`str` or `iterable` of `str`): The dcids of Places to query for. - stat_var (`str`): The dcid of the StatisticalVariable. - Returns: - A `list` of `dict`, one per element of `places`. Each `dict` consists of - the time series and place identifier. - - Examples: - >>> _time_series_pd_input(["geoId/29", "geoId/33"], "Count_Person") - [ - {'2020-03-07': 20, '2020-03-08': 40, 'place': 'geoId/29'}, - {'2020-08-21': 428, '2020-08-22': 429, 'place': 'geoId/33'} - ] - """ - - rows_dict = _group_stat_all_by_obs_options(places, [stat_var], - keep_series=True) - most_geos = [] - max_geo_count_so_far = 0 - latest_date = [] - latest_date_so_far = '' - for options, rows in rows_dict.items(): - current_geos = len(rows) - if current_geos > max_geo_count_so_far: - max_geo_count_so_far = current_geos - most_geos = [options] - # Reset tiebreaker stats. Recompute after this if-else block. - latest_date = [] - latest_date_so_far = '' - elif current_geos == max_geo_count_so_far: - most_geos.append(options) - else: - # Do not compute tiebreaker stats if no change to most_geos. - # Skip to top of the for loop. - continue - - for row in rows: - dates = set(row.keys()) - dates.remove('place') - row_max_date = max(dates) - if row_max_date > latest_date_so_far: - latest_date_so_far = row_max_date - latest_date = [options] - elif row_max_date == latest_date_so_far: - latest_date.append(options) - for options in most_geos: - if options in latest_date: - return rows_dict[options] - - -def build_time_series_dataframe(places, stat_var, desc_col=False): - """Constructs a pandas DataFrame with `places` as the index and dates of the time series as the columns. - - To ensure statistics are comparable across all Places, when multiple - StatVarObservations options are available for Place and StatVar combos, - Data Commons selects the StatVarObservation options that covers the most - Places, and breaks ties using the StatVarObservation options that yield - the latest Observation for any Place. - - Args: - places (`str` or `iterable` of `str`): The dcids of Places to query for. - stat_var (`str`): The dcid of the StatisticalVariable. - desc_col: Whether to order columns in descending order. - Returns: - A pandas DataFrame with Place IDs as the index, and sorted dates as columns. - """ - try: - if isinstance(places, six.string_types): - places = [places] - else: - places = list(places) - assert all(isinstance(place, six.string_types) for place in places) - except: - raise ValueError( - 'Parameter `places` must be a string object or list-like object of string.' - ) - if not isinstance(stat_var, six.string_types): - raise ValueError('Parameter `stat_var` must be a string.') - - df = pd.DataFrame.from_records(_time_series_pd_input(places, stat_var)) - df.set_index('place', inplace=True) - df.sort_index(inplace=True) - return df[sorted(df.columns, reverse=desc_col)] - - -def _multivariate_pd_input(places, stat_vars): - """Returns a `list` of `dict` per element of `places` based on the `stat_var`. - - Data Commons will pick a set of StatVarObservation options that covers the - maximum number of queried places. Among ties, Data Commons selects an option - set with the latest Observation. - - Args: - places (`str` or `iterable` of `str`): The dcids of Places to query for. - stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables. - Returns: - A `list` of `dict`, one per element of `places`. Each `dict` consists of - the time series and place identifier. - - Examples: - >>> _multivariate_pd_input(["geoId/29", "geoId/33"], - ["Count_Person", "Median_Income_Person"]) - [ - {'Count_Person': 20, 'Median_Income_Person': 40, 'place': 'geoId/29'}, - {'Count_Person': 428, 'Median_Income_Person': 429, 'place': 'geoId/33'} - ] - """ - - rows_dict = _group_stat_all_by_obs_options(places, - stat_vars, - keep_series=False) - place2cov = collections.defaultdict(dict) # {geo: {var1: 3, var2: 33}} - - for stat_var, candidates_dict in rows_dict.items(): - selected_rows = None - most_geos = [] - max_geo_count_so_far = 0 - latest_date = [] - latest_date_so_far = '' - for options, rows in candidates_dict.items(): - current_geos = len(rows) - if current_geos > max_geo_count_so_far: - max_geo_count_so_far = current_geos - most_geos = [options] - # Reset tiebreaker stats. Recompute after this if-else block. - latest_date = [] - latest_date_so_far = '' - elif current_geos == max_geo_count_so_far: - most_geos.append(options) - else: - # Do not compute tiebreaker stats if not in most_geos. - continue - - for row in rows: - row_date = row['date'] - if row_date > latest_date_so_far: - latest_date_so_far = row_date - latest_date = [options] - elif row_date == latest_date_so_far: - latest_date.append(options) - for options in most_geos: - if options in latest_date: - selected_rows = candidates_dict[options] - - for row in selected_rows: - place2cov[row['place']][stat_var] = row['val'] - return [ - dict({'place': place}, **multivariates) - for place, multivariates in place2cov.items() - ] - - -def build_multivariate_dataframe(places, stat_vars): - """Constructs a pandas DataFrame with `places` as the index and `stat_vars` as the columns. - - To ensure statistics are comparable across all Places, when multiple - StatVarObservations options are available for Place and StatVar combos, - Data Commons selects the StatVarObservation options that covers the most - Places, and breaks ties using the StatVarObservation options that yield - the latest Observation for any Place. - - Args: - places (`str` or `iterable` of `str`): The dcids of Places to query for. - stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables. - Returns: - A pandas DataFrame with Place IDs as the index and `stat_vars` as columns. - """ - try: - if isinstance(places, six.string_types): - places = [places] - else: - places = list(places) - assert all(isinstance(place, six.string_types) for place in places) - if isinstance(stat_vars, six.string_types): - stat_vars = [stat_vars] - else: - stat_vars = list(stat_vars) - assert all( - isinstance(stat_var, six.string_types) for stat_var in stat_vars) - except: - raise ValueError( - 'Parameter `places` and `stat_vars` must be string object or list-like object.' - ) - df = pd.DataFrame.from_records(_multivariate_pd_input(places, stat_vars)) - df.set_index('place', inplace=True) - df.sort_index(inplace=True) - return df diff --git a/datacommons_pandas/examples/__init__.py b/datacommons_pandas/examples/__init__.py deleted file mode 100644 index 2c79033c..00000000 --- a/datacommons_pandas/examples/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2020 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/datacommons_pandas/examples/df_builder.py b/datacommons_pandas/examples/df_builder.py deleted file mode 100644 index ea09ea1e..00000000 --- a/datacommons_pandas/examples/df_builder.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright 2020 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Basic examples for building pandas objects using the Data Commons Pandas API.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import datacommons_pandas as dcpd - - -def build_time_series_example(): - - print(""" -# Build a pd.Series of time series for one variable and one place. -$ dcpd.build_time_series('country/CAN', 'Count_WildlandFireEvent') -{}""".format(dcpd.build_time_series('country/CAN', 'Count_WildlandFireEvent'))) - - print(""" -# Build a pd.Series of time series for one variable and one place and optional args. -$ dcpd.build_time_series('country/USA', 'Count_Person', 'CensusPEPSurvey') -{}""".format( - dcpd.build_time_series('country/USA', 'Count_Person', 'CensusPEPSurvey'))) - - -def build_time_series_dataframe_example(): - - def demonstrate_build_time_series_dataframe(intro_str, - places, - stat_var, - desc_col=False): - arg_str = "{}, '{}'".format(places, stat_var) - if desc_col: - arg_str += ", desc_col=True" - print(""" - # {} - $ dcpd.build_time_series_dataframe({}) - {}""".format(intro_str, arg_str, - dcpd.build_time_series_dataframe(places, stat_var, desc_col))) - - build_time_series_dataframe_params = [{ - 'intro_str': - 'Build a DataFrame of time series for one variable in multiple places.', - 'places': ['geoId/33', 'geoId/29', 'country/USA'], - 'stat_var': - 'Median_Income_Person' - }, { - 'intro_str': - 'Build a DataFrame of time series with columns sorted in descending order.', - 'places': ['country/USA'], - 'stat_var': - 'Median_Income_Person', - 'desc_col': - True - }] - - for param_set in build_time_series_dataframe_params: - demonstrate_build_time_series_dataframe(**param_set) - - -def build_multivariate_dataframe_example(): - - def demonstrate_build_multivariate_dataframe(intro_str, places, stat_vars): - print(""" - # {} - $ dcpd.build_multivariate_dataframe({}, {}) - {}""".format(intro_str, places, stat_vars, - dcpd.build_multivariate_dataframe(places, stat_vars))) - - build_multivariate_dataframe_params = [{ - 'intro_str': - 'Build a DataFrame of latest observations for multiple variables in multiple places.', - 'places': ['geoId/06', 'country/FRA'], - 'stat_vars': ['Median_Age_Person', 'Count_Person', 'Count_Household'] - }] - - for param_set in build_multivariate_dataframe_params: - demonstrate_build_multivariate_dataframe(**param_set) - - -def expect_err_examples(): - - print("\n\nExpect 6 errors, starting HERE:") - try: - dcpd.build_time_series_dataframe(['geoId/33'], - ['Median_Income_Person', 'Count_Person']) - except ValueError as e: - print("Successfully errored on: ", e) - try: - dcpd.build_time_series_dataframe(24, ['Median_Income_Person']) - except ValueError as e: - print("Successfully errored on: ", e) - try: - dcpd.build_multivariate_dataframe([3], - ['Median_Income_Person', 'Count_Person']) - except ValueError as e: - print("Successfully errored on: ", e) - try: - dcpd.build_multivariate_dataframe('country/USA', True) - except ValueError as e: - print("Successfully errored on: ", e) - # If the following two do not error due to the addition of - # Median_Income_Person statistics for NUTS geos, then please - # replace either the places or the StatVar. - try: - dcpd.build_time_series_dataframe(['nuts/HU2', 'nuts/HU22'], - 'Median_Income_Person') - except ValueError as e: - print("Successfully errored on: ", e) - try: - dcpd.build_multivariate_dataframe(['nuts/HU2', 'nuts/HU22'], - ['Median_Income_Person']) - except ValueError as e: - print("Successfully errored on: ", e) - print("until HERE.") - - -def main(): - build_time_series_example() - build_time_series_dataframe_example() - build_multivariate_dataframe_example() - expect_err_examples() - - -if __name__ == '__main__': - main() diff --git a/datacommons_pandas/key.py b/datacommons_pandas/key.py deleted file mode 120000 index 7d56dd70..00000000 --- a/datacommons_pandas/key.py +++ /dev/null @@ -1 +0,0 @@ -../datacommons/key.py \ No newline at end of file diff --git a/datacommons_pandas/node.py b/datacommons_pandas/node.py deleted file mode 120000 index 67c10b6f..00000000 --- a/datacommons_pandas/node.py +++ /dev/null @@ -1 +0,0 @@ -../datacommons/node.py \ No newline at end of file diff --git a/datacommons_pandas/places.py b/datacommons_pandas/places.py deleted file mode 120000 index 7206307a..00000000 --- a/datacommons_pandas/places.py +++ /dev/null @@ -1 +0,0 @@ -../datacommons/places.py \ No newline at end of file diff --git a/datacommons_pandas/requests.py b/datacommons_pandas/requests.py deleted file mode 120000 index 9bee6da0..00000000 --- a/datacommons_pandas/requests.py +++ /dev/null @@ -1 +0,0 @@ -../datacommons/requests.py \ No newline at end of file diff --git a/datacommons_pandas/setup.py b/datacommons_pandas/setup.py deleted file mode 100644 index 0d7329f8..00000000 --- a/datacommons_pandas/setup.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2020 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Build and distribute the datacommons_pandas package to PyPI.""" -import os - -from setuptools import setup - -dir_path = os.path.dirname(os.path.realpath(__file__)) -with open(os.path.join(dir_path, 'README.md'), 'r') as fh: - long_description = fh.read() - -# Package metadata. -NAME = 'datacommons_pandas' -DESCRIPTION = 'A library to create pandas objects using the Data Commons Python API.' -URL = 'https://github.com/datacommonsorg/api-python' -EMAIL = 'support@datacommons.org' -AUTHOR = 'datacommons.org' -REQUIRES_PYTHON = '>=3.7' -VERSION = '0.0.4' -REQUIRED = ['pandas', 'six', 'requests'] -PACKAGES = ['datacommons_pandas'] - -setup( - name=NAME, - version=VERSION, - description=DESCRIPTION, - long_description=long_description, - long_description_content_type='text/markdown', - author=AUTHOR, - author_email=EMAIL, - maintainer=AUTHOR, - maintainer_email=EMAIL, - python_requires=REQUIRES_PYTHON, - url=URL, - packages=PACKAGES, - install_requires=REQUIRED, - include_package_data=True, - license='Apache 2.0', - classifiers=[ - 'Development Status :: 7 - Inactive', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: Implementation :: CPython', - 'Topic :: Software Development', - ], -) diff --git a/datacommons_pandas/sparql.py b/datacommons_pandas/sparql.py deleted file mode 120000 index f0d921b2..00000000 --- a/datacommons_pandas/sparql.py +++ /dev/null @@ -1 +0,0 @@ -../datacommons/sparql.py \ No newline at end of file diff --git a/datacommons_pandas/stat_vars.py b/datacommons_pandas/stat_vars.py deleted file mode 120000 index ab7359b6..00000000 --- a/datacommons_pandas/stat_vars.py +++ /dev/null @@ -1 +0,0 @@ -../datacommons/stat_vars.py \ No newline at end of file diff --git a/datacommons_pandas/test/__init__.py b/datacommons_pandas/test/__init__.py deleted file mode 100644 index 2c79033c..00000000 --- a/datacommons_pandas/test/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2020 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/datacommons_pandas/test/df_builder_test.py b/datacommons_pandas/test/df_builder_test.py deleted file mode 100644 index 52ebcf57..00000000 --- a/datacommons_pandas/test/df_builder_test.py +++ /dev/null @@ -1,320 +0,0 @@ -# Copyright 2020 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Python API unit tests. - -Unit tests for StatVar methods in the Data Commons Pandas API. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -try: - from unittest.mock import patch -except ImportError: - from mock import patch - -import json -import unittest - -import pandas as pd -import six -import six.moves.urllib as urllib - -import datacommons_pandas.df_builder as dcpd -import datacommons_pandas.utils as utils - -# Reusable parts of REST API /stat/all response. -CA_COUNT_PERSON = { - "isDcAggregate": - "true", - "sourceSeries": [{ - "val": { - "1990": 23640, - "1991": 24100, - "1993": 25090, - }, - "observationPeriod": "P1Y", - "importName": "WorldDevelopmentIndicators", - "provenanceDomain": "worldbank.org" - }, { - "val": { - "1790": 3929214, - "1800": 5308483, - "1810": 7239881, - }, - "measurementMethod": "WikidataPopulation", - "importName": "WikidataPopulation", - "provenanceDomain": "wikidata.org" - }, { - "val": { - "1890": 28360, - "1891": 24910, - "1892": 25070, - }, - "measurementMethod": "OECDRegionalStatistics", - "observationPeriod": "P1Y", - "importName": "OECDRegionalDemography", - "provenanceDomain": "oecd.org" - }] -} - -HU22_COUNT_PERSON = { - "sourceSeries": [{ - "val": { - "1990": 2360, - "1991": 2410, - "1992": 2500, - }, - "measurementMethod": "OECDRegionalStatistics", - "observationPeriod": "P1Y", - "importName": "OECDRegionalDemography", - "provenanceDomain": "oecd.org" - }] -} - -CA_MEDIAN_AGE_PERSON = { - "sourceSeries": [{ - "val": { - "1990": 12, - "1991": 24, - "1992": 24, - }, - "measurementMethod": "WikidataPopulation", - "importName": "WikidataPopulation", - "provenanceDomain": "wikidata.org" - }] -} - - -def request_mock(*args, **kwargs): - """A mock urlopen requests sent in the requests package.""" - - # Create the mock response object. - class MockResponse: - - def __init__(self, json_data): - self.json_data = json_data - - def read(self): - return self.json_data - - req = args[0] - - stat_value_url_base = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_value'] - stat_series_url_base = utils._API_ROOT + utils._API_ENDPOINTS[ - 'get_stat_series'] - stat_all_url_base = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_all'] - - # Mock responses for urlopen requests to get_stat_series. - if req.get_full_url( - ) == stat_series_url_base + '?place=geoId/06&stat_var=Count_Person': - # Response returned when querying with basic args. - return MockResponse(json.dumps({"series": {"2000": 1, "2001": 2}})) - if (req.get_full_url() == stat_series_url_base + - '?place=geoId/06&stat_var=Count_Person&' + - 'measurement_method=CensusPEPSurvey&observation_period=P1Y&' + - 'unit=RealPeople&scaling_factor=100'): - - # Response returned when querying with above optional params. - return MockResponse(json.dumps({"series": {"2000": 3, "2001": 42}})) - if (req.get_full_url() == stat_series_url_base + - '?place=geoId/06&stat_var=Count_Person&' + 'measurement_method=DNE'): - - # Response returned when data not available for optional parameters. - # /stat/series?place=geoId/06&stat_var=Count_Person&measurement_method=DNE - return MockResponse(json.dumps({"series": {}})) - - # Mock responses for urlopen requests to get_stat_all. - if req.get_full_url() == stat_all_url_base: - data = json.loads(req.data) - - if (data['places'] == ['geoId/06', 'nuts/HU22'] and - data['stat_vars'] == ['Count_Person', 'Median_Age_Person']): - # Response returned when querying with above params. - # Median Age missing for HU22. - resp = { - "placeData": { - "geoId/06": { - "statVarData": { - "Count_Person": CA_COUNT_PERSON, - "Median_Age_Person": CA_MEDIAN_AGE_PERSON - } - }, - "nuts/HU22": { - "statVarData": { - "Count_Person": HU22_COUNT_PERSON, - "Median_Age_Person": {} - } - } - } - } - return MockResponse(json.dumps(resp)) - - if (data['places'] == ['geoId/06', 'nuts/HU22'] and - data['stat_vars'] == ['Count_Person']): - # Response returned when querying with above params. - resp = { - "placeData": { - "geoId/06": { - "statVarData": { - "Count_Person": CA_COUNT_PERSON, - } - }, - "nuts/HU22": { - "statVarData": { - "Count_Person": HU22_COUNT_PERSON, - } - } - } - } - return MockResponse(json.dumps(resp)) - - if (data['places'] == ['geoId/06'] and - data['stat_vars'] == ['Count_Person']): - # Response returned when querying with above params. - resp = { - "placeData": { - "geoId/06": { - "statVarData": { - "Count_Person": CA_COUNT_PERSON, - } - } - } - } - return MockResponse(json.dumps(resp)) - - if (data['places'] == ['geoId/06', 'nuts/HU22'] and - data['stat_vars'] == ['Count_Person', 'Median_Age_Person']): - # Response returned when querying with above params. - # Median Age missing for HU22. - resp = { - "placeData": { - "geoId/06": { - "statVarData": { - "Count_Person": CA_COUNT_PERSON, - "Median_Age_Person": CA_MEDIAN_AGE_PERSON - } - }, - "nuts/HU22": { - "statVarData": { - "Count_Person": HU22_COUNT_PERSON, - "Median_Age_Person": {} - } - } - } - } - return MockResponse(json.dumps(resp)) - # Otherwise, return an empty response and a 404. - return urllib.error.HTTPError(None, 404, None, None, None) - - -class TestBuildTimeSeries(unittest.TestCase): - """Unit tests for build_time_series.""" - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_basic(self, urlopen): - """Calling build_time_series with basic args.""" - series = dcpd.build_time_series('geoId/06', 'Count_Person') - exp = pd.Series({"2000": 1, "2001": 2}) - - self.assertCountEqual(series, exp) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_multi_option(self, urlopen): - """Calling build_time_series with basic args.""" - series = dcpd.build_time_series('geoId/06', 'Count_Person', - 'CensusPEPSurvey', 'P1Y', 'RealPeople', - '100') - exp = pd.Series({"2000": 3, "2001": 42}) - - self.assertCountEqual(series, exp) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_no_data(self, urlopen): - """Error if there is no data.""" - series = dcpd.build_time_series('geoId/06', 'Count_Person', 'DNE') - exp = pd.Series({}, dtype=object) - - self.assertCountEqual(series, exp) - - -class TestPdTimeSeries(unittest.TestCase): - """Unit tests for _time_series_pd_input.""" - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_basic(self, urlopen): - """Calling _time_series_pd_input with proper args.""" - rows = dcpd._time_series_pd_input(['geoId/06', 'nuts/HU22'], 'Count_Person') - exp = [{ - "1890": 28360, - "1891": 24910, - "1892": 25070, - "place": "geoId/06" - }, { - "1991": 2410, - "1990": 2360, - "1992": 2500, - "place": "nuts/HU22" - }] - six.assertCountEqual(self, rows, exp) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_one_place(self, urlopen): - """Calling _time_series_pd_input with single place.""" - rows = dcpd._time_series_pd_input(['geoId/06'], 'Count_Person') - exp = [{"1990": 23640, "1991": 24100, "1993": 25090, "place": "geoId/06"}] - self.assertEqual(rows, exp) - - -class TestPdMultivariates(unittest.TestCase): - """Unit tests for _multivariate_pd_input.""" - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_basic(self, urlopen): - """Calling _multivariate_pd_input with proper args.""" - rows = dcpd._multivariate_pd_input(['geoId/06', 'nuts/HU22'], - ['Count_Person', 'Median_Age_Person']) - exp = [{ - "place": "geoId/06", - "Median_Age_Person": 24, - "Count_Person": 25070 - }, { - "place": "nuts/HU22", - "Count_Person": 2500 - }] - six.assertCountEqual(self, rows, exp) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_one_each(self, urlopen): - """Calling _multivariate_pd_input with single place and var.""" - rows = dcpd._multivariate_pd_input(['geoId/06'], ['Count_Person']) - exp = [{"place": "geoId/06", "Count_Person": 25090}] - self.assertEqual(rows, exp) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_no_data(self, urlopen): - """Error if there is no data.""" - with self.assertRaises(ValueError): - dcpd._group_stat_all_by_obs_options(['FOO/100'], - ['Count_Person', 'Median_Age_Person']) - with self.assertRaises(ValueError): - dcpd._time_series_pd_input(['FOO/100', 'BAR/200'], ['Count_Person']) - with self.assertRaises(ValueError): - dcpd._multivariate_pd_input(['FOO/100', 'BAR/200'], - ['Count_Person', 'Median_Age_Person']) - - -if __name__ == '__main__': - unittest.main() diff --git a/datacommons_pandas/utils.py b/datacommons_pandas/utils.py deleted file mode 120000 index 06c545f5..00000000 --- a/datacommons_pandas/utils.py +++ /dev/null @@ -1 +0,0 @@ -../datacommons/utils.py \ No newline at end of file diff --git a/docs/release.md b/docs/release.md index f435ed87..96425cb4 100644 --- a/docs/release.md +++ b/docs/release.md @@ -30,61 +30,3 @@ hatch run release:pypi ```bash hatch run release:tag ``` - ---- - -## Releasing the legacy packages - - -Note: Always release `datacommons_pandas` when `datacommons` is released. - -**If this is your first time releasing to PyPI**, please review the PyPI guide -starting from the -[setup -section](https://packaging.python.org/tutorials/packaging-projects/#creating-setup-py). - -## Prepare release tools - -```bash -python3 -m venv .env -source .env/bin/activate -python3 -m pip install --upgrade setuptools wheel -python3 -m pip install --upgrade twine -``` - -## Release to Test PyPI - -1. In [datacommons/setup.py](../datacommons/setup.py) and [datacommons_pandas/setup.py](../datacommons_pandas/setup.py): - - - Append "-USERNAME" to the package "NAME". For example, - `NAME = 'foo_package-janedoe123'`. - - Increment the "VERSION" codes to something that has not been used in your - test project. This will not affect the production PyPI versioning. - -1. In the repo root directly, build the dists and release to TestPyPI: - - ```bash - rm dist/* - python3 datacommons/setup.py sdist bdist_wheel - python3 datacommons_pandas/setup.py sdist bdist_wheel - python3 -m twine upload --repository testpypi dist/* - ``` - -## Release to Production PyPI - -1. In [datacommons/setup.py](../datacommons/setup.py) and - [datacommons_pandas/setup.py](../datacommons_pandas/setup.py): - - - Revert the package name to `datacommons` and `datacommons_pandas` - - Update and double check "VERSION" - -1. Update [datacommons/CHANGELOG.md](../datacommons/CHANGELOG.md) and [datacommons_pandas/CHANGELOG.md](../datacommons_pandas/CHANGELOG.md) - -1. Build the dists and release to PyPI: - - ```bash - rm dist/* - python3 datacommons/setup.py sdist bdist_wheel - python3 datacommons_pandas/setup.py sdist bdist_wheel - python3 -m twine upload dist/* - ```