Invalid exception in query builder
PGSync version: 2.2.0 Postgres version: 14 Elasticsearch version: 7.17 Redis version: 5.0.7 Python version: 3.8
Problem Description: We have a companies table and all aggregation related tables. Foreign keys for these child tables are user defined.
Example of the schema
{
"database": "postgres",
"index": "company",
"plugins": [],
"nodes": {
"table": "companies_active",
"schema": "public",
"columns": ["id", "name", "address", "city_id", "logo", "website"],
"children": [
{
"table": "es_number_of_employees_per_company",
"columns": ["count"],
"label": "numberOfEmployees",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"child": ["companies_id"],
"parent": ["id"]
}
}
}
]
}
}
]```
1. Ran bootstrap -t -c company.json
2. Ran bootstrap -c company.json
3. Ran pgsync -c company.json
Error Message (if any):
pgsync -c company.json
- companies_active
|- es_number_of_employees_per_company
|- es_avg_total_work_experience_per_company
|- es_avg_work_experience_per_company
|- es_total_number_of_previous_companies_per_current_company
|- es_avg_duration_of_previous_jobs_per_company
|- es_number_of_previous_companies_per_profile_avg
|- es_avg_previous_job_titles_per_company
|- es_avg_work_experience_per_job_title_per_company
|- es_resume_profile_job_titles_per_company_agg
|- es_resume_profiles_based_in_per_company
|- es_avg_total_work_experience_agg
|- es_avg_work_experience_per_profile_per_company_years_agg
|- es_duration_of_previous_jobs_grouped_by_years_agg
|- es_avg_total_work_experience_per_job_title_per_company
|- es_avg_work_experience_per_job_title_per_gender_per_company
|- es_avg_previous_work_experience_per_job_title_per_company
|- es_resume_profiles_based_in_per_company_agg
|- es_resume_profile_previous_job_titles_per_previous_company
- cities
- countries
2022-03-01 16:42:35.991:ERROR:pgsync.sync: Exception invalid expression
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/pgsync/sync.py", line 888, in sync
self.query_builder.build_queries(node)
File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 740, in build_queries
self._non_through(node)
File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 662, in _non_through
_keys = self._get_child_keys(node, self._json_build_object(params))
File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 53, in _json_build_object
raise RuntimeError("invalid expression")
RuntimeError: invalid expression
2022-03-01 16:42:35.993:ERROR:pgsync.elastichelper: Exception invalid expression
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/pgsync/elastichelper.py", line 105, in bulk
self._bulk(
File "/usr/local/lib/python3.8/dist-packages/pgsync/elastichelper.py", line 171, in _bulk
for _ in helpers.parallel_bulk(
File "/usr/local/lib/python3.8/dist-packages/elasticsearch/helpers/actions.py", line 472, in parallel_bulk
for result in pool.imap(
File "/usr/lib/python3.8/multiprocessing/pool.py", line 868, in next
raise value
File "/usr/lib/python3.8/multiprocessing/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "/usr/lib/python3.8/multiprocessing/pool.py", line 144, in _helper_reraises_exception
raise ex
File "/usr/lib/python3.8/multiprocessing/pool.py", line 388, in _guarded_task_generation
for i, x in enumerate(iterable):
File "/usr/local/lib/python3.8/dist-packages/elasticsearch/helpers/actions.py", line 155, in _chunk_actions
for action, data in actions:
File "/usr/local/lib/python3.8/dist-packages/pgsync/sync.py", line 888, in sync
self.query_builder.build_queries(node)
File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 740, in build_queries
self._non_through(node)
File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 662, in _non_through
_keys = self._get_child_keys(node, self._json_build_object(params))
File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 53, in _json_build_object
raise RuntimeError("invalid expression")
RuntimeError: invalid expression
0:00:08.483692 (8.48 sec)
Traceback (most recent call last):
File "/usr/local/bin/pgsync", line 7, in <module>
sync.main()
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1128, in __call__
return self.main(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1053, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1395, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 754, in invoke
return __callback(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/pgsync/sync.py", line 1232, in main
sync.pull()
File "/usr/local/lib/python3.8/dist-packages/pgsync/sync.py", line 1073, in pull
self.es.bulk(self.index, self.sync(txmin=txmin, txmax=txmax))
File "/usr/local/lib/python3.8/dist-packages/pgsync/elastichelper.py", line 105, in bulk
self._bulk(
File "/usr/local/lib/python3.8/dist-packages/pgsync/elastichelper.py", line 171, in _bulk
for _ in helpers.parallel_bulk(
File "/usr/local/lib/python3.8/dist-packages/elasticsearch/helpers/actions.py", line 472, in parallel_bulk
for result in pool.imap(
File "/usr/lib/python3.8/multiprocessing/pool.py", line 868, in next
raise value
File "/usr/lib/python3.8/multiprocessing/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "/usr/lib/python3.8/multiprocessing/pool.py", line 144, in _helper_reraises_exception
raise ex
File "/usr/lib/python3.8/multiprocessing/pool.py", line 388, in _guarded_task_generation
for i, x in enumerate(iterable):
File "/usr/local/lib/python3.8/dist-packages/elasticsearch/helpers/actions.py", line 155, in _chunk_actions
for action, data in actions:
File "/usr/local/lib/python3.8/dist-packages/pgsync/sync.py", line 888, in sync
self.query_builder.build_queries(node)
File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 740, in build_queries
self._non_through(node)
File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 662, in _non_through
_keys = self._get_child_keys(node, self._json_build_object(params))
File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 53, in _json_build_object
raise RuntimeError("invalid expression")
RuntimeError: invalid expression
Can you provide more details on your schema, please? e.g the actual schema.json you are using. There is a discrepancy between the schema.json and the output above. Also, can you highlight which fields are json or jsonb. do you have any custom field types?
@toluaina Sure, here it is. So basically all of the columns in aggregations are string or some number, can be int or float depending on the aggregation that was in in the table. Sorry but for some reason its going outside of the code block and is not formatted properly but you will see the gist of it
` { "database": "postgres", "index": "company", "plugins": [], "nodes": { "table": "companies_active", "schema": "public", "columns": ["id", "name", "address", "city_id", "logo", "website"], "children": [ { "table": "number_of_employees_per_company", "columns": ["count"],
"label": "numberOfEmployees",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"child": ["companies_id"],
"parent": ["id"]
}
}
},
{
"table": "avg_total_work_experience_per_company",
"columns": ["avg_work_experience_in_days"],
"label": "avgTotalWorkExperience",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"child": ["companies_id"],
"parent": ["id"]
}
}
},
{
"table": "avg_work_experience_per_company",
"columns": ["avg_work_experience_in_days"],
"label": "avgWorkExperience",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"child": ["companies_id"],
"parent": ["id"]
}
}
},
{
"table": "total_number_of_previous_companies_per_current_company",
"columns": ["count"],
"label": "totalNumberOfPreviousCompanies",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"child": ["current_company_id"],
"parent": ["id"]
}
}
},
{
"table": "avg_duration_of_previous_jobs_per_company",
"columns": ["avg_duration"],
"label": "averageDurationOfPreviousJobs",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"child": ["current_company_id"],
"parent": ["id"]
}
}
},
{
"table": "number_of_previous_companies_per_profile_avg",
"columns": ["avg"],
"label": "averageNumberOfPreviousCompanies",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"child": ["current_company_id"],
"parent": ["id"]
}
}
},
{
"table": "avg_previous_job_titles_per_company",
"columns": ["avg", "job_title"],
"label": "averagePreviousJobTitles",
"relationship": {
"variant": "object",
"type": "one_to_many",
"foreign_key": {
"child": ["current_company_id"],
"parent": ["id"]
}
}
},
{
"table": "avg_work_experience_per_job_title_per_company",
"columns": [
"job_title_name",
"avg_experience_in_days",
"cnt",
"percentage",
"years"
],
"label": "averageWorkExperiencePerJobTitle",
"relationship": {
"variant": "object",
"type": "one_to_many",
"foreign_key": {
"child": ["companies_id"],
"parent": ["id"]
}
}
},
{
"table": "resume_profile_job_titles_per_company_agg",
"columns": ["job_title_name", "count", "percentage"],
"label": "jobTitles",
"relationship": {
"variant": "object",
"type": "one_to_many",
"foreign_key": {
"child": ["companies_id"],
"parent": ["id"]
}
}
},
{
"table": "resume_profiles_based_in_per_company",
"columns": ["name", "iso2", "cnt", "job_title_name"],
"label": "jobTitlesBasedIn",
"relationship": {
"variant": "object",
"type": "one_to_many",
"foreign_key": {
"child": ["companies_id"],
"parent": ["id"]
}
}
},
{
"table": "avg_total_work_experience_per_profile_per_company_years_agg",
"columns": ["years", "count", "percentage"],
"label": "averageTotalWorkExperience",
"relationship": {
"variant": "object",
"type": "one_to_many",
"foreign_key": {
"child": ["companies_id"],
"parent": ["id"]
}
}
},
{
"table": "avg_work_experience_per_profile_per_company_years_agg",
"columns": ["years", "count", "percentage"],
"label": "averageWorkExperience",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"child": ["current_company_id"],
"parent": ["id"]
}
}
},
{
"table": "duration_of_previous_jobs_grouped_by_years_agg",
"columns": ["years", "count", "percentage"],
"label": "previousEmploymentDuration",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"child": ["current_company_id"],
"parent": ["id"]
}
}
},
{
"table": "avg_work_experience_per_job_title_per_gender_per_company",
"columns": [
"job_title_name",
"gender",
"avg_experience_in_days",
"cnt",
"percentage"
],
"label": "averageWorkExperiencePerJobTitlePerGender",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"child": ["companies_id"],
"parent": ["id"]
}
}
},
{
"table": "resume_profile_previous_job_titles_per_previous_company",
"columns": [
"previous_company_name",
"previous_job_title_name",
"count",
"percentage"
],
"label": "previousJobTitlesPerCompany",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"child": ["current_company_id"],
"parent": ["id"]
}
}
},
{
"table": "cities",
"columns": ["id", "name", "name_ascii"],
"transform": {
"mapping": {
"id": {
"type": "long"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"normalizer": "lowercase"
},
"search_as_you_type": {
"type": "search_as_you_type"
}
}
},
"name_ascii": { "type": "keyword" }
}
},
"label": "city",
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"child": ["id"],
"parent": ["city_id"]
}
},
"children": [
{
"table": "countries",
"schema": "public",
"columns": ["id", "name", "iso2", "iso3"],
"label": "country",
"transform": {
"mapping": {
"id": {
"type": "long"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"normalizer": "lowercase"
},
"search_as_you_type": {
"type": "search_as_you_type"
}
}
},
"iso2": { "type": "keyword" },
"iso3": { "type": "keyword" }
}
},
"relationship": {
"variant": "object",
"type": "one_to_one",
"foreign_key": {
"child": ["id"],
"parent": ["country_id"]
}
}
}
]
}
]
}
}
`
I had this similar issue and found out it was due to primary key missing in a table in postgres ( and that table was one of the children of root table in schema.json). Maybe this helps.