pgsync icon indicating copy to clipboard operation
pgsync copied to clipboard

Invalid exception in query builder

Open asovanek opened this issue 3 years ago • 3 comments

PGSync version: 2.2.0 Postgres version: 14 Elasticsearch version: 7.17 Redis version: 5.0.7 Python version: 3.8

Problem Description: We have a companies table and all aggregation related tables. Foreign keys for these child tables are user defined.

Example of the schema

	{
		"database": "postgres",
		"index": "company",
		"plugins": [],
		"nodes": {
			"table": "companies_active",
			"schema": "public",
			"columns": ["id", "name", "address", "city_id", "logo", "website"],
			"children": [
				{
					"table": "es_number_of_employees_per_company",
					"columns": ["count"],

					"label": "numberOfEmployees",
					"relationship": {
						"variant": "object",
						"type": "one_to_one",
						"foreign_key": {
							"child": ["companies_id"],
							"parent": ["id"]
						}
					}
				}
			]
		}
	}
]```

1. Ran bootstrap -t -c company.json
2. Ran bootstrap -c company.json
3. Ran pgsync -c company.json



Error Message (if any): 

pgsync -c company.json 
 - companies_active
   |- es_number_of_employees_per_company
   |- es_avg_total_work_experience_per_company
   |- es_avg_work_experience_per_company
   |- es_total_number_of_previous_companies_per_current_company
   |- es_avg_duration_of_previous_jobs_per_company
   |- es_number_of_previous_companies_per_profile_avg
   |- es_avg_previous_job_titles_per_company
   |- es_avg_work_experience_per_job_title_per_company
   |- es_resume_profile_job_titles_per_company_agg
   |- es_resume_profiles_based_in_per_company
   |- es_avg_total_work_experience_agg
   |- es_avg_work_experience_per_profile_per_company_years_agg
   |- es_duration_of_previous_jobs_grouped_by_years_agg
   |- es_avg_total_work_experience_per_job_title_per_company
   |- es_avg_work_experience_per_job_title_per_gender_per_company
   |- es_avg_previous_work_experience_per_job_title_per_company
   |- es_resume_profiles_based_in_per_company_agg
   |- es_resume_profile_previous_job_titles_per_previous_company
    - cities
       - countries
2022-03-01 16:42:35.991:ERROR:pgsync.sync: Exception invalid expression
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/pgsync/sync.py", line 888, in sync
    self.query_builder.build_queries(node)
  File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 740, in build_queries
    self._non_through(node)
  File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 662, in _non_through
    _keys = self._get_child_keys(node, self._json_build_object(params))
  File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 53, in _json_build_object
    raise RuntimeError("invalid expression")
RuntimeError: invalid expression
2022-03-01 16:42:35.993:ERROR:pgsync.elastichelper: Exception invalid expression
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/pgsync/elastichelper.py", line 105, in bulk
    self._bulk(
  File "/usr/local/lib/python3.8/dist-packages/pgsync/elastichelper.py", line 171, in _bulk
    for _ in helpers.parallel_bulk(
  File "/usr/local/lib/python3.8/dist-packages/elasticsearch/helpers/actions.py", line 472, in parallel_bulk
    for result in pool.imap(
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 868, in next
    raise value
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 125, in worker
    result = (True, func(*args, **kwds))
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 144, in _helper_reraises_exception
    raise ex
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 388, in _guarded_task_generation
    for i, x in enumerate(iterable):
  File "/usr/local/lib/python3.8/dist-packages/elasticsearch/helpers/actions.py", line 155, in _chunk_actions
    for action, data in actions:
  File "/usr/local/lib/python3.8/dist-packages/pgsync/sync.py", line 888, in sync
    self.query_builder.build_queries(node)
  File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 740, in build_queries
    self._non_through(node)
  File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 662, in _non_through
    _keys = self._get_child_keys(node, self._json_build_object(params))
  File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 53, in _json_build_object
    raise RuntimeError("invalid expression")
RuntimeError: invalid expression
 0:00:08.483692 (8.48 sec)
Traceback (most recent call last):
  File "/usr/local/bin/pgsync", line 7, in <module>
    sync.main()
  File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1128, in __call__
    return self.main(*args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1053, in main
    rv = self.invoke(ctx)
  File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1395, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 754, in invoke
    return __callback(*args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/pgsync/sync.py", line 1232, in main
    sync.pull()
  File "/usr/local/lib/python3.8/dist-packages/pgsync/sync.py", line 1073, in pull
    self.es.bulk(self.index, self.sync(txmin=txmin, txmax=txmax))
  File "/usr/local/lib/python3.8/dist-packages/pgsync/elastichelper.py", line 105, in bulk
    self._bulk(
  File "/usr/local/lib/python3.8/dist-packages/pgsync/elastichelper.py", line 171, in _bulk
    for _ in helpers.parallel_bulk(
  File "/usr/local/lib/python3.8/dist-packages/elasticsearch/helpers/actions.py", line 472, in parallel_bulk
    for result in pool.imap(
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 868, in next
    raise value
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 125, in worker
    result = (True, func(*args, **kwds))
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 144, in _helper_reraises_exception
    raise ex
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 388, in _guarded_task_generation
    for i, x in enumerate(iterable):
  File "/usr/local/lib/python3.8/dist-packages/elasticsearch/helpers/actions.py", line 155, in _chunk_actions
    for action, data in actions:
  File "/usr/local/lib/python3.8/dist-packages/pgsync/sync.py", line 888, in sync
    self.query_builder.build_queries(node)
  File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 740, in build_queries
    self._non_through(node)
  File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 662, in _non_through
    _keys = self._get_child_keys(node, self._json_build_object(params))
  File "/usr/local/lib/python3.8/dist-packages/pgsync/querybuilder.py", line 53, in _json_build_object
    raise RuntimeError("invalid expression")
RuntimeError: invalid expression

asovanek avatar Mar 01 '22 15:03 asovanek

Can you provide more details on your schema, please? e.g the actual schema.json you are using. There is a discrepancy between the schema.json and the output above. Also, can you highlight which fields are json or jsonb. do you have any custom field types?

toluaina avatar Mar 03 '22 22:03 toluaina

@toluaina Sure, here it is. So basically all of the columns in aggregations are string or some number, can be int or float depending on the aggregation that was in in the table. Sorry but for some reason its going outside of the code block and is not formatted properly but you will see the gist of it

` { "database": "postgres", "index": "company", "plugins": [], "nodes": { "table": "companies_active", "schema": "public", "columns": ["id", "name", "address", "city_id", "logo", "website"], "children": [ { "table": "number_of_employees_per_company", "columns": ["count"],

				"label": "numberOfEmployees",
				"relationship": {
					"variant": "object",
					"type": "one_to_one",
					"foreign_key": {
						"child": ["companies_id"],
						"parent": ["id"]
					}
				}
			},

			{
				"table": "avg_total_work_experience_per_company",
				"columns": ["avg_work_experience_in_days"],
				"label": "avgTotalWorkExperience",
				"relationship": {
					"variant": "object",
					"type": "one_to_one",
					"foreign_key": {
						"child": ["companies_id"],
						"parent": ["id"]
					}
				}
			},

			{
				"table": "avg_work_experience_per_company",
				"columns": ["avg_work_experience_in_days"],
				"label": "avgWorkExperience",
				"relationship": {
					"variant": "object",
					"type": "one_to_one",
					"foreign_key": {
						"child": ["companies_id"],
						"parent": ["id"]
					}
				}
			},
			{
				"table": "total_number_of_previous_companies_per_current_company",
				"columns": ["count"],

				"label": "totalNumberOfPreviousCompanies",
				"relationship": {
					"variant": "object",
					"type": "one_to_one",
					"foreign_key": {
						"child": ["current_company_id"],
						"parent": ["id"]
					}
				}
			},

			{
				"table": "avg_duration_of_previous_jobs_per_company",
				"columns": ["avg_duration"],
				"label": "averageDurationOfPreviousJobs",
				"relationship": {
					"variant": "object",
					"type": "one_to_one",
					"foreign_key": {
						"child": ["current_company_id"],
						"parent": ["id"]
					}
				}
			},

			{
				"table": "number_of_previous_companies_per_profile_avg",
				"columns": ["avg"],
				"label": "averageNumberOfPreviousCompanies",
				"relationship": {
					"variant": "object",
					"type": "one_to_one",
					"foreign_key": {
						"child": ["current_company_id"],
						"parent": ["id"]
					}
				}
			},

			{
				"table": "avg_previous_job_titles_per_company",
				"columns": ["avg", "job_title"],
				"label": "averagePreviousJobTitles",
				"relationship": {
					"variant": "object",
					"type": "one_to_many",
					"foreign_key": {
						"child": ["current_company_id"],
						"parent": ["id"]
					}
				}
			},

			{
				"table": "avg_work_experience_per_job_title_per_company",
				"columns": [
					"job_title_name",
					"avg_experience_in_days",
					"cnt",
					"percentage",
					"years"
				],
				"label": "averageWorkExperiencePerJobTitle",
				"relationship": {
					"variant": "object",
					"type": "one_to_many",
					"foreign_key": {
						"child": ["companies_id"],
						"parent": ["id"]
					}
				}
			},

			{
				"table": "resume_profile_job_titles_per_company_agg",
				"columns": ["job_title_name", "count", "percentage"],

				"label": "jobTitles",
				"relationship": {
					"variant": "object",
					"type": "one_to_many",
					"foreign_key": {
						"child": ["companies_id"],
						"parent": ["id"]
					}
				}
			},

			{
				"table": "resume_profiles_based_in_per_company",
				"columns": ["name", "iso2", "cnt", "job_title_name"],

				"label": "jobTitlesBasedIn",
				"relationship": {
					"variant": "object",
					"type": "one_to_many",
					"foreign_key": {
						"child": ["companies_id"],
						"parent": ["id"]
					}
				}
			},
			{
				"table": "avg_total_work_experience_per_profile_per_company_years_agg",
				"columns": ["years", "count", "percentage"],

				"label": "averageTotalWorkExperience",
				"relationship": {
					"variant": "object",
					"type": "one_to_many",
					"foreign_key": {
						"child": ["companies_id"],
						"parent": ["id"]
					}
				}
			},

			{
				"table": "avg_work_experience_per_profile_per_company_years_agg",
				"columns": ["years", "count", "percentage"],

				"label": "averageWorkExperience",
				"relationship": {
					"variant": "object",
					"type": "one_to_one",
					"foreign_key": {
						"child": ["current_company_id"],
						"parent": ["id"]
					}
				}
			},

			{
				"table": "duration_of_previous_jobs_grouped_by_years_agg",
				"columns": ["years", "count", "percentage"],

				"label": "previousEmploymentDuration",
				"relationship": {
					"variant": "object",
					"type": "one_to_one",
					"foreign_key": {
						"child": ["current_company_id"],
						"parent": ["id"]
					}
				}
			},
			{
				"table": "avg_work_experience_per_job_title_per_gender_per_company",
				"columns": [
					"job_title_name",
					"gender",
					"avg_experience_in_days",
					"cnt",
					"percentage"
				],

				"label": "averageWorkExperiencePerJobTitlePerGender",
				"relationship": {
					"variant": "object",
					"type": "one_to_one",
					"foreign_key": {
						"child": ["companies_id"],
						"parent": ["id"]
					}
				}
			},

			{
				"table": "resume_profile_previous_job_titles_per_previous_company",
				"columns": [
					"previous_company_name",
					"previous_job_title_name",
					"count",
					"percentage"
				],

				"label": "previousJobTitlesPerCompany",
				"relationship": {
					"variant": "object",
					"type": "one_to_one",
					"foreign_key": {
						"child": ["current_company_id"],
						"parent": ["id"]
					}
				}
			},

			{
				"table": "cities",
				"columns": ["id", "name", "name_ascii"],
				"transform": {
					"mapping": {
						"id": {
							"type": "long"
						},
						"name": {
							"type": "text",
							"fields": {
								"keyword": {
									"type": "keyword",
									"normalizer": "lowercase"
								},
								"search_as_you_type": {
									"type": "search_as_you_type"
								}
							}
						},
						"name_ascii": { "type": "keyword" }
					}
				},
				"label": "city",
				"relationship": {
					"variant": "object",
					"type": "one_to_one",
					"foreign_key": {
						"child": ["id"],
						"parent": ["city_id"]
					}
				},
				"children": [
					{
						"table": "countries",
						"schema": "public",
						"columns": ["id", "name", "iso2", "iso3"],
						"label": "country",
						"transform": {
							"mapping": {
								"id": {
									"type": "long"
								},
								"name": {
									"type": "text",
									"fields": {
										"keyword": {
											"type": "keyword",
											"normalizer": "lowercase"
										},
										"search_as_you_type": {
											"type": "search_as_you_type"
										}
									}
								},
								"iso2": { "type": "keyword" },
								"iso3": { "type": "keyword" }
							}
						},
						"relationship": {
							"variant": "object",
							"type": "one_to_one",
							"foreign_key": {
								"child": ["id"],
								"parent": ["country_id"]
							}
						}
					}
				]
			}
		]
	}
}

`

asovanek avatar Mar 09 '22 09:03 asovanek

I had this similar issue and found out it was due to primary key missing in a table in postgres ( and that table was one of the children of root table in schema.json). Maybe this helps.

chintal31 avatar Apr 06 '22 09:04 chintal31