I'm using pyhive
to connect to Hive on GCP Dataproc. Queries succeed and fail intermittently. This is one of the issues. It happened during a
DROP TABLE IF EXISTS db_name.tb_name
Client side got
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/thrift/transport/TSocket.py", line 154, in write
plus = self.handle.send(buff)
BrokenPipeError: [Errno 32] Broken pipe
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
...
...
self._cursor.execute(sql, *args, **kwargs) # type: ignore
File "/usr/local/lib/python3.7/site-packages/pyhive/hive.py", line 356, in execute
self._reset_state()
File "/usr/local/lib/python3.7/site-packages/pyhive/hive.py", line 266, in _reset_state
response = self._connection.client.CloseOperation(request)
File "/usr/local/lib/python3.7/site-packages/TCLIService/TCLIService.py", line 651, in CloseOperation
self.send_CloseOperation(req)
File "/usr/local/lib/python3.7/site-packages/TCLIService/TCLIService.py", line 660, in send_CloseOperation
self._oprot.trans.flush()
File "/usr/local/lib/python3.7/site-packages/thrift_sasl/__init__.py", line 143, in flush
self._flushPlain(buffer)
File "/usr/local/lib/python3.7/site-packages/thrift_sasl/__init__.py", line 166, in _flushPlain
self._trans.write(struct.pack(">I", len(buffer)) + buffer)
File "/usr/local/lib/python3.7/site-packages/thrift/transport/TSocket.py", line 161, in write
raise TTransportException(message="unexpected exception", inner=e)
thrift.transport.TTransport.TTransportException: unexpected exception
Server side log contains a lot of
{
"protoPayload": {
"@type": "type.googleapis.com/google.cloud.audit.AuditLog",
"status": {
"code": 7,
"message": "PERMISSION_DENIED",
"details": [
{
"@type": "type.googleapis.com/google.rpc.PreconditionFailure",
"violations": [
{
"type": "VPC_SERVICE_CONTROLS",
"description": "enVxsuY7TYqavK8rl_fwvgr3yn4-9bneHjDTtQFvton7C03R3sHaXA"
}
]
}
]
},
"authenticationInfo": {},
"requestMetadata": {
"callerIp": "73.252.204.93",
"requestAttributes": {},
"destinationAttributes": {}
},
"serviceName": "dataproc.googleapis.com",
"methodName": "google.cloud.dataproc.v1beta2.ClusterController.ListClusters",
"resourceName": "projects/xxx",
"metadata": {
"securityPolicyInfo": {
"servicePerimeterName": "accessPolicies/xxx/servicePerimeters/Production_Security_Zone",
"organizationId": "xxx"
},
"violationReason": "NO_MATCHING_ACCESS_LEVEL",
"ingressViolations": [
{
"servicePerimeter": "accessPolicies/xxx/servicePerimeters/Production_Security_Zone",
"targetResource": "xxx"
}
],
"@type": "type.googleapis.com/google.cloud.audit.VpcServiceControlAuditMetadata",
"resourceNames": [
"xxx"
],
"vpcServiceControlsUniqueId": "xxx",
"accessLevels": [
"no_matching_definitions"
]
}
},
"insertId": "xxx",
"resource": {
"type": "audited_resource",
"labels": {
"method": "google.cloud.dataproc.v1beta2.ClusterController.ListClusters",
"project_id": "xxx",
"service": "dataproc.googleapis.com"
}
},
"timestamp": "2021-03-13T08:09:35.213190134Z",
"severity": "ERROR",
"logName": "projects/xxx/logs/cloudaudit.googleapis.com%2Fpolicy",
"receiveTimestamp": "2021-03-13T08:09:35.968888182Z"
}
I'm connecting from a Mac laptop behind VPN.
sasl3==0.2.11
thrift==0.13.0
thrift-sasl==0.4.2
pyhive==0.6.3
Any pointers how to fix this? Seems to be server side permission setting? Thanks!!