基因数据处理31之avocado运行avocado-cli中的avocado问题3-变异识别找不到RecordGroupSample(null)

来源:互联网 发布:风口 知乎 编辑:程序博客网 时间:2024/06/05 18:20

读入的read为:

 val fqFile = "hs38DHSE1L100F1.sam"

读取结果:

cleanedReads.count:1{"readNum": 0, "contig": {"contigName": "chrUn_KN707963v1_decoy", "contigLength": 62955, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 357}, "start": 44526, "oldPosition": null, "end": 44596, "mapq": 0, "readName": "chrUn_KN707963v1_decoy_44034_44596_0:0:0_3:0:0_0", "sequence": "GTGAGATACATTTCCACTGATCAAACACGTGATGTAACCCTTGTCATGGTTCTGCTTACAGGGGATTTGT", "qual": "2222222222222222222222222222222222222222222222222222222222222222222222", "cigar": "70M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": true, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": "4T22G18A23", "origQual": null, "attributes": "XT:A:R\tXO:i:0\tXM:i:3\tNM:i:3\tXG:i:0\tX1:i:3\tX0:i:3", "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}

变异识别时报错:getRecordGroupSample

2016-05-28 15:47:18 ERROR Executor:96 - Exception in task 0.0 in stage 16.0 (TID 14)java.lang.NullPointerException    at org.bdgenomics.avocado.discovery.ReadExplorer$$anonfun$readToObservations$1.apply(ReadExplorer.scala:54)at org.bdgenomics.avocado.discovery.ReadExplorer$$anonfun$readToObservations$1.apply(ReadExplorer.scala:46)    at org.apache.spark.rdd.Timer.time(Timer.scala:57)    at org.bdgenomics.avocado.discovery.ReadExplorer.readToObservations(ReadExplorer.scala:46)    at org.bdgenomics.avocado.discovery.ReadExplorer$$anonfun$discover$1$$anonfun$apply$3.apply(ReadExplorer.scala:177)    at org.bdgenomics.avocado.discovery.ReadExplorer$$anonfun$discover$1$$anonfun$apply$3.apply(ReadExplorer.scala:177)    at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)    at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:207)    at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:62)    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70)    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)    at org.apache.spark.scheduler.Task.run(Task.scala:70)    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)    at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)    at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)    at java.lang.Thread.run(Unknown Source)2016-05-28 15:47:18 WARN  TaskSetManager:71 - Lost task 0.0 in stage 16.0 (TID 14, localhost): java.lang.NullPointerException    at org.bdgenomics.avocado.discovery.ReadExplorer$$anonfun$readToObservations$1.apply(ReadExplorer.scala:54)at org.bdgenomics.avocado.discovery.ReadExplorer$$anonfun$readToObservations$1.apply(ReadExplorer.scala:46)    at org.apache.spark.rdd.Timer.time(Timer.scala:57)    at org.bdgenomics.avocado.discovery.ReadExplorer.readToObservations(ReadExplorer.scala:46)    at org.bdgenomics.avocado.discovery.ReadExplorer$$anonfun$discover$1$$anonfun$apply$3.apply(ReadExplorer.scala:177)    at org.bdgenomics.avocado.discovery.ReadExplorer$$anonfun$discover$1$$anonfun$apply$3.apply(ReadExplorer.scala:177)    at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)    at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:207)    at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:62)    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70)    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)    at org.apache.spark.scheduler.Task.run(Task.scala:70)    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)    at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)    at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)    at java.lang.Thread.run(Unknown Source)2016-05-28 15:47:18 ERROR TaskSetManager:75 - Task 0 in stage 16.0 failed 1 times; aborting jobException in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 16.0 failed 1 times, most recent failure: Lost task 0.0 in stage 16.0 (TID 14, localhost): java.lang.NullPointerException    at org.bdgenomics.avocado.discovery.ReadExplorer$$anonfun$readToObservations$1.apply(ReadExplorer.scala:54)at org.bdgenomics.avocado.discovery.ReadExplorer$$anonfun$readToObservations$1.apply(ReadExplorer.scala:46)    at org.apache.spark.rdd.Timer.time(Timer.scala:57)    at org.bdgenomics.avocado.discovery.ReadExplorer.readToObservations(ReadExplorer.scala:46)    at org.bdgenomics.avocado.discovery.ReadExplorer$$anonfun$discover$1$$anonfun$apply$3.apply(ReadExplorer.scala:177)    at org.bdgenomics.avocado.discovery.ReadExplorer$$anonfun$discover$1$$anonfun$apply$3.apply(ReadExplorer.scala:177)    at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)    at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:207)    at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:62)    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70)    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)    at org.apache.spark.scheduler.Task.run(Task.scala:70)    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)    at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)    at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)    at java.lang.Thread.run(Unknown Source)Driver stacktrace:    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1273)at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1264)    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1263)at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1263)at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)at scala.Option.foreach(Option.scala:236)at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:730)at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1457)at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1418)at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
0 0