Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
*.class
.env
.vscode
**__pycache__**
**node_modules**
dpv.textproto
pop_obs_spec_common.textproto
92 changes: 92 additions & 0 deletions dpv/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# Generate py protobuf:
# protoc -I=./ --python_out=./ ./stat_config.proto

from google.protobuf import text_format
import stat_config_pb2
import logging
import sys
import collections

import util


logging.basicConfig(stream=sys.stdout, level=logging.INFO)


def analyze(prop_list, pt_to_info_list, existing_keys):
props = prop_list.props

result = []
for pt_mprop, info_list in pt_to_info_list.items():
pt, mprop = pt_mprop.split("^")
pv_mapping = {}
is_dpv_case = False
for prop in props:
curr_set = set()
for info in info_list:
curr_set.add(info.pv[prop])
if len(curr_set) == 1:
is_dpv_case = True
pv_mapping[prop] = curr_set

if is_dpv_case:
spec = stat_config_pb2.PopObsSpec(pop_type=pt)
spec.obs_props.append(stat_config_pb2.ObsProp(mprop=mprop))
valid = True
for p, vset in pv_mapping.items():
if len(vset) == 1:
v = next(iter(vset))
if p == "householdType" and v == "Houseless":
valid = False
break
spec.dpv.append(stat_config_pb2.PopObsSpec.PV(prop=p, val=v))
else:
spec.cprop.append(p)
if not valid:
continue
keys = util.compute_pop_obs_spec_keys(spec)
for key in keys:
if key not in existing_keys:
# print(key)
result.append(spec)
return result



def run():
spec_list = []
existing_keys = util.get_existing_dpv_spec_key()
for key in existing_keys:
print(key)
data = util.read_data()
prop_list_2_stat_var = collections.defaultdict(lambda: collections.defaultdict(list))
for id, info in data.items():
prop_list = util.PropList(info.pv.keys())
prop_list_2_stat_var[prop_list][info.pt + "^" + info.mprop].append(info)
all_prop_list = sorted(list(prop_list_2_stat_var.keys()))
for prop_list in all_prop_list:
specs = analyze(prop_list, prop_list_2_stat_var[prop_list], existing_keys)
spec_list.extend(specs)

result = stat_config_pb2.PopObsSpecList(spec=spec_list)
f = open("dpv.textproto", "w")
f.write(text_format.MessageToString(result))
f.close()

if __name__ == '__main__':
run()
2 changes: 2 additions & 0 deletions dpv/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
requests
protobuf
57 changes: 57 additions & 0 deletions dpv/stat_config.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package datacommons.proto;

// This proto contains specification of a statsVar Observation spec.
// It contains the stat_type, measurement property, measurement denominator,
// measurement qualifier, scaling factor, and anme of the spec if shown as a
// leaf node on the PV tree
message ObsProp {
optional string mprop = 2;
optional string mqual = 3;
optional string mdenom = 4;
reserved 1, 6, 7;
}

// This proto contains the specification of a PopObs instance. It contains
// the intended population type, constraining, property and hidden
// property-value pairs and a list of ObsProps
message PopObsSpec {
// A property-value pair
message PV {
optional string prop = 1;
optional string val = 2;
}

// Population type.
optional string pop_type = 1;
// Constraining properties of StatisticalPopulation.
repeated string cprop = 4;
// A list of depending property value pairs that a client does not indicate
// but needs to be added.
// For example, property "income" needs additional pv of "age=Years15Onwards"
repeated PV dpv = 5;
// Verticals of this spec.
repeated string vertical = 7;
repeated ObsProp obs_props = 8;
reserved 6;
}

// All published Pop/Obs spec. Stored in a textproto.
message PopObsSpecList {
repeated PopObsSpec spec = 1;
}
Loading