hudi icon indicating copy to clipboard operation
hudi copied to clipboard

Exception org.apache.hudi.exception.HoodieIOException: Could not read commit details

Open XuQianJin-Stars opened this issue 3 years ago • 2 comments

org.apache.hudi.exception.HoodieIOException: Could not read commit details from hdfs://XXXXXX/.hoodie/20220719053423274.deltacommit
	at org.apache.hudi.common.table.timeline.HoodieActiveTimeline.readDataFromPath(HoodieActiveTimeline.java:763)
	at org.apache.hudi.common.table.timeline.HoodieActiveTimeline.getInstantDetails(HoodieActiveTimeline.java:264)
	at org.apache.hudi.common.table.timeline.HoodieDefaultTimeline.getInstantDetails(HoodieDefaultTimeline.java:372)
	at org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.getCommitMetadata(HoodieInputFormatUtils.java:511)
	at org.apache.hudi.sink.partitioner.profile.WriteProfiles.getCommitMetadata(WriteProfiles.java:194)
	at org.apache.hudi.source.IncrementalInputSplits.lambda$inputSplits$71(IncrementalInputSplits.java:183)
	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
	at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384)
	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
	at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708)
	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
	at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:566)
	at org.apache.hudi.source.IncrementalInputSplits.inputSplits(IncrementalInputSplits.java:183)
	at org.apache.hudi.source.StreamReadMonitoringFunction.monitorDirAndForwardSplits(StreamReadMonitoringFunction.java:196)
	at org.apache.hudi.source.StreamReadMonitoringFunction.run(StreamReadMonitoringFunction.java:169)
	at org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:110)
	at org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:66)
	at org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:269)
Caused by: java.io.FileNotFoundException: File does not exist: /XXXXXX.hoodie/20220719053423274.deltacommit
	at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:86)
	at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76)
	at org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.getBlockLocations(FSDirStatAndListingOp.java:156)
	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1995)
	at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:797)
	at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:290)
	at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
	at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:529)
	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073)
	at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1039)
	at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:963)
	at java.base/java.security.AccessController.doPrivileged(Native Method)
	at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:2034)
	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:3047)

	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:121)
	at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:88)
	at org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:1015)
	at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:1002)
	at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:991)
	at org.apache.hadoop.hdfs.DFSClient.open(DFSClient.java:1229)
	at org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:350)
	at org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:346)
	at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
	at org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:358)
	at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:811)
	at org.apache.hudi.common.fs.HoodieWrapperFileSystem.open(HoodieWrapperFileSystem.java:460)
	at org.apache.hudi.common.table.timeline.HoodieActiveTimeline.readDataFromPath(HoodieActiveTimeline.java:760)
	... 18 more
Caused by: org.apache.hadoop.ipc.RemoteException(java.io.FileNotFoundException): File does not exist: /user/tdw/warehouse/csig_billing_rt_ods.db/ods_fee_center_tfee_7_9_ri/.hoodie/20220719053423274.deltacommit
	at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:86)
	at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76)
	at org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.getBlockLocations(FSDirStatAndListingOp.java:156)
	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1995)
	at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:797)
	at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:290)
	at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
	at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:529)
	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073)
	at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1039)
	at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:963)
	at java.base/java.security.AccessController.doPrivileged(Native Method)
	at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:2034)
	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:3047)

	at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1559)
	at org.apache.hadoop.ipc.Client.call(Client.java:1505)
	at org.apache.hadoop.ipc.Client.call(Client.java:1412)
	at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:232)
	at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:118)
	at com.sun.proxy.$Proxy10.getBlockLocations(Unknown Source)
	at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getBlockLocations(ClientNamenodeProtocolTranslatorPB.java:293)
	at sun.reflect.GeneratedMethodAccessor38.invoke(Unknown Source)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:411)
	at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:163)
	at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:155)
	at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)
	at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:346)
	at com.sun.proxy.$Proxy11.getBlockLocations(Unknown Source)
	at org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:1013)
	... 28 more

XuQianJin-Stars avatar Jul 19 '22 09:07 XuQianJin-Stars

@XuQianJin-Stars Can you elaborate more on the setup and environment and steps to reproduce?

rmahindra123 avatar Jul 20 '22 05:07 rmahindra123

also met this error today. Caused by: org.apache.hudi.exception.HoodieIOException: Could not read commit details from hdfs://xxx/.hoodie/20220720100959953.compaction.requested at org.apache.hudi.common.table.timeline.HoodieActiveTimeline.readDataFromPath(HoodieActiveTimeline.java:624) at org.apache.hudi.common.table.timeline.HoodieActiveTimeline.readCompactionPlanAsBytes(HoodieActiveTimeline.java:266) at org.apache.hudi.common.util.CompactionUtils.getCompactionPlan(CompactionUtils.java:148) at org.apache.hudi.sink.compact.CompactionCommitSink.lambda$commitIfNecessary$1(CompactionCommitSink.java:116) at java.util.HashMap.computeIfAbsent(HashMap.java:1127) at org.apache.hudi.sink.compact.CompactionCommitSink.commitIfNecessary(CompactionCommitSink.java:114) at org.apache.hudi.sink.compact.CompactionCommitSink.invoke(CompactionCommitSink.java:103) at org.apache.hudi.sink.compact.CompactionCommitSink.invoke(CompactionCommitSink.java:54) at org.apache.flink.streaming.api.operators.StreamSink.processElement(StreamSink.java:54) at org.apache.flink.streaming.runtime.tasks.OneInputStreamTask$StreamTaskNetworkOutput.emitRecord(OneInputStreamTask.java:205) at org.apache.flink.streaming.runtime.io.AbstractStreamTaskNetworkInput.processElement(AbstractStreamTaskNetworkInput.java:134) at org.apache.flink.streaming.runtime.io.AbstractStreamTaskNetworkInput.emitNext(AbstractStreamTaskNetworkInput.java:105) at org.apache.flink.streaming.runtime.io.StreamOneInputProcessor.processInput(StreamOneInputProcessor.java:66) at org.apache.flink.streaming.runtime.tasks.StreamTask.processInput(StreamTask.java:423) at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:204) at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:684) at org.apache.flink.streaming.runtime.tasks.S

fengjian428 avatar Jul 20 '22 15:07 fengjian428

@XuQianJin-Stars : gentle reminder to respond to above's request/questions.

nsivabalan avatar Aug 16 '22 07:08 nsivabalan

@XuQianJin-Stars @fengjian428 : did you folks inspect the .hoodie folder. by any chance the delta commit or commit file of interest seen in stacktrace is 0 byte file? does the error goes away if you restart the pipeline? or its just stuck. if you can share the contents of .hoodie, we can dig in to see whats happening.

nsivabalan avatar Aug 28 '22 00:08 nsivabalan

closed as discussed offline, this issue is not re-occurring now.

xushiyan avatar Oct 30 '22 14:10 xushiyan