hadoop 4.1.0 cdh4读文件源码分析

来源:互联网 发布:java前端 编辑:程序博客网 时间:2024/05/16 08:27







DFSInputStream(DFSClient dfsClient, String src, int buffersize, boolean verifyChecksum                 ) throws IOException, UnresolvedLinkException {    this.dfsClient = dfsClient;    this.verifyChecksum = verifyChecksum;    this.buffersize = buffersize;    this.src = src;    this.socketCache = dfsClient.socketCache;    prefetchSize = dfsClient.getConf().prefetchSize;    timeWindow = dfsClient.getConf().timeWindow;    nCachedConnRetry = dfsClient.getConf().nCachedConnRetry;    openInfo();  }


LocatedBlocks newInfo = dfsClient.getLocatedBlocks(src, 0, prefetchSize);    if (DFSClient.LOG.isDebugEnabled()) {      DFSClient.LOG.debug("newInfo = " + newInfo);    }    if (newInfo == null) {      throw new IOException("Cannot open filename " + src);    }    if (locatedBlocks != null) {      Iterator<LocatedBlock> oldIter = locatedBlocks.getLocatedBlocks().iterator();      Iterator<LocatedBlock> newIter = newInfo.getLocatedBlocks().iterator();      while (oldIter.hasNext() && newIter.hasNext()) {        if (! oldIter.next().getBlock().equals(newIter.next().getBlock())) {          throw new IOException("Blocklist for " + src + " has changed!");        }      }    }    locatedBlocks = newInfo;    long lastBlockBeingWrittenLength = 0;    if (!locatedBlocks.isLastBlockComplete()) {      final LocatedBlock last = locatedBlocks.getLastLocatedBlock();      if (last != null) {        if (last.getLocations().length == 0) {          return -1;        }        final long len = readBlockLength(last);        last.getBlock().setNumBytes(len);        lastBlockBeingWrittenLength = len;       }    }    currentNode = null;    return lastBlockBeingWrittenLength;






public synchronized int read(final byte buf[], int off, int len) throws IOException {    ReaderStrategy byteArrayReader = new ByteArrayStrategy(buf);    return readWithStrategy(byteArrayReader, off, len);  }



try {          // currentNode can be left as null if previous read had a checksum          // error on the same block. See HDFS-3067          if (pos > blockEnd || currentNode == null) {            currentNode = blockSeekTo(pos);          }          int realLen = (int) Math.min(len, (blockEnd - pos + 1L));          int result = readBuffer(strategy, off, realLen, corruptedBlockMap);                    if (result >= 0) {            pos += result;          } else {            // got a EOS from reader though we expect more data on it.            throw new IOException("Unexpected EOS from the reader");          }          if (dfsClient.stats != null && result != -1) {            dfsClient.stats.incrementBytesRead(result);          }          return result;








public int findBlock(long offset) {    // create fake block of size 0 as a key    LocatedBlock key = new LocatedBlock(        new ExtendedBlock(), new DatanodeInfo[0], 0L, false);    key.setStartOffset(offset);    key.getBlock().setNumBytes(1);    Comparator<LocatedBlock> comp =       new Comparator<LocatedBlock>() {        // Returns 0 iff a is inside b or b is inside a        @Override        public int compare(LocatedBlock a, LocatedBlock b) {          long aBeg = a.getStartOffset();          long bBeg = b.getStartOffset();          long aEnd = aBeg + a.getBlockSize();          long bEnd = bBeg + b.getBlockSize();          if(aBeg <= bBeg && bEnd <= aEnd               || bBeg <= aBeg && aEnd <= bEnd)            return 0; // one of the blocks is inside the other          if(aBeg < bBeg)            return -1; // a's left bound is to the left of the b's          return 1;        }      };    return Collections.binarySearch(blocks, key, comp);  }




static DatanodeInfo bestNode(DatanodeInfo nodes[],                                AbstractMap<DatanodeInfo, DatanodeInfo> deadNodes)                               throws IOException {    if (nodes != null) {       for (int i = 0; i < nodes.length; i++) {        if (!deadNodes.containsKey(nodes[i])) {          return nodes[i];        }      }    }    throw new IOException("No live nodes contain current block");  }



private DNAddrPair chooseDataNode(LocatedBlock block)    throws IOException {    while (true) {      DatanodeInfo[] nodes = block.getLocations();      try {        DatanodeInfo chosenNode = bestNode(nodes, deadNodes);        final String dnAddr =            chosenNode.getXferAddr(dfsClient.connectToDnViaHostname());        if (DFSClient.LOG.isDebugEnabled()) {          DFSClient.LOG.debug("Connecting to datanode " + dnAddr);        }        InetSocketAddress targetAddr = NetUtils.createSocketAddr(dnAddr);        return new DNAddrPair(chosenNode, targetAddr);      } catch (IOException ie) {        String blockInfo = block.getBlock() + " file=" + src;        if (failures >= dfsClient.getMaxBlockAcquireFailures()) {          throw new BlockMissingException(src, "Could not obtain block: " + blockInfo,                                          block.getStartOffset());        }                if (nodes == null || nodes.length == 0) {          DFSClient.LOG.info("No node available for block: " + blockInfo);        }        DFSClient.LOG.info("Could not obtain block " + block.getBlock()            + " from any node: " + ie            + ". Will get new block locations from namenode and retry...");        try {          // Introducing a random factor to the wait time before another retry.          // The wait time is dependent on # of failures and a random factor.          // At the first time of getting a BlockMissingException, the wait time          // is a random number between 0..3000 ms. If the first retry          // still fails, we will wait 3000 ms grace period before the 2nd retry.          // Also at the second retry, the waiting window is expanded to 6000 ms          // alleviating the request rate from the server. Similarly the 3rd retry          // will wait 6000ms grace period before retry and the waiting window is          // expanded to 9000ms.           double waitTime = timeWindow * failures +       // grace period for the last round of attempt            timeWindow * (failures + 1) * DFSUtil.getRandom().nextDouble(); // expanding time window for each failure          DFSClient.LOG.warn("DFS chooseDataNode: got # " + (failures + 1) + " IOException, will wait for " + waitTime + " msec.");          Thread.sleep((long)waitTime);        } catch (InterruptedException iex) {        }        deadNodes.clear(); //2nd option is to remove only nodes[blockId]        openInfo();        block = getBlockAt(block.getStartOffset(), false);        failures++;        continue;    }    }  } 




// Can't local read a block under construction, see HDFS-2757    if (dfsClient.shouldTryShortCircuitRead(dnAddr) &&        !blockUnderConstruction()) {      return DFSClient.getLocalBlockReader(dfsClient.conf, src, block,          blockToken, chosenNode, dfsClient.hdfsTimeout, startOffset,          dfsClient.connectToDnViaHostname());    }


// Allow retry since there is no way of knowing whether the cached socket    // is good until we actually use it.    for (int retries = 0; retries <= nCachedConnRetry && fromCache; ++retries) {      SocketAndStreams sockAndStreams = null;      // Don't use the cache on the last attempt - it's possible that there      // are arbitrarily many unusable sockets in the cache, but we don't      // want to fail the read.      if (retries < nCachedConnRetry) {        sockAndStreams = socketCache.get(dnAddr);      }      Socket sock;      if (sockAndStreams == null) {        fromCache = false;        sock = dfsClient.socketFactory.createSocket();                // TCP_NODELAY is crucial here because of bad interactions between        // Nagle's Algorithm and Delayed ACKs. With connection keepalive        // between the client and DN, the conversation looks like:        //   1. Client -> DN: Read block X        //   2. DN -> Client: data for block X        //   3. Client -> DN: Status OK (successful read)        //   4. Client -> DN: Read block Y        // The fact that step #3 and #4 are both in the client->DN direction        // triggers Nagling. If the DN is using delayed ACKs, this results        // in a delay of 40ms or more.        //        // TCP_NODELAY disables nagling and thus avoids this performance        // disaster.        sock.setTcpNoDelay(true);        NetUtils.connect(sock, dnAddr,            dfsClient.getRandomLocalInterfaceAddr(),            dfsClient.getConf().socketTimeout);        sock.setSoTimeout(dfsClient.getConf().socketTimeout);      } else {        sock = sockAndStreams.sock;      }      try {        // The OP_READ_BLOCK request is sent as we make the BlockReader        BlockReader reader =            BlockReaderFactory.newBlockReader(dfsClient.getConf(),                                       sock, file, block,                                       blockToken,                                       startOffset, len,                                       bufferSize, verifyChecksum,                                       clientName,                                       dfsClient.getDataEncryptionKey(),                                       sockAndStreams == null ? null : sockAndStreams.ioStreams);        return reader;      } catch (IOException ex) {        // Our socket is no good.        DFSClient.LOG.debug("Error making BlockReader. Closing stale " + sock, ex);        if (sockAndStreams != null) {          sockAndStreams.close();        } else {          sock.close();        }        err = ex;      }




 public synchronized int read(byte[] buf, int off, int len)                                throws IOException {    if (curDataSlice == null || curDataSlice.remaining() == 0 && bytesNeededToFinish > 0) {      readNextPacket();    }    if (curDataSlice.remaining() == 0) {      // we're at EOF now      return -1;    }        int nRead = Math.min(curDataSlice.remaining(), len);    curDataSlice.get(buf, off, nRead);        return nRead;


//Read packet headers.    packetReceiver.receiveNextPacket(in);    PacketHeader curHeader = packetReceiver.getHeader();    curDataSlice = packetReceiver.getDataSlice();    assert curDataSlice.capacity() == curHeader.getDataLen();        if (LOG.isTraceEnabled()) {      LOG.trace("DFSClient readNextPacket got header " + curHeader);    }    // Sanity check the lengths    if (!curHeader.sanityCheck(lastSeqNo)) {         throw new IOException("BlockReader: error in packet header " +                               curHeader);    }        if (curHeader.getDataLen() > 0) {      int chunks = 1 + (curHeader.getDataLen() - 1) / bytesPerChecksum;      int checksumsLen = chunks * checksumSize;      assert packetReceiver.getChecksumSlice().capacity() == checksumsLen :        "checksum slice capacity=" + packetReceiver.getChecksumSlice().capacity() +           " checksumsLen=" + checksumsLen;            lastSeqNo = curHeader.getSeqno();      if (verifyChecksum && curDataSlice.remaining() > 0) {        // N.B.: the checksum error offset reported here is actually        // relative to the start of the block, not the start of the file.        // This is slightly misleading, but preserves the behavior from        // the older BlockReader.        checksum.verifyChunkedSums(curDataSlice,            packetReceiver.getChecksumSlice(),            filename, curHeader.getOffsetInBlock());      }      bytesNeededToFinish -= curHeader.getDataLen();    }            // First packet will include some data prior to the first byte    // the user requested. Skip it.    if (curHeader.getOffsetInBlock() < startOffset) {      int newPos = (int) (startOffset - curHeader.getOffsetInBlock());      curDataSlice.position(newPos);    }    // If we've now satisfied the whole client read, read one last packet    // header, which should be empty    if (bytesNeededToFinish <= 0) {      readTrailingEmptyPacket();      if (verifyChecksum) {        sendReadResult(Status.CHECKSUM_OK);      } else {        sendReadResult(Status.SUCCESS);      }    }








