blob: 9fe10f1f86a8601487e461117f763d292750d550 [file] [log] [blame]
/*
* Copyright 2020 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.android.exoplayer2.audio;
import androidx.annotation.IntDef;
import com.google.android.exoplayer2.C;
import com.google.android.exoplayer2.ParserException;
import com.google.android.exoplayer2.util.Log;
import com.google.android.exoplayer2.util.ParsableBitArray;
import java.lang.annotation.Documented;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
/** Utility methods for handling AAC audio streams. */
public final class AacUtil {
private static final String TAG = "AacUtil";
/** Holds sample format information for AAC audio. */
public static final class Config {
/** The sample rate in Hertz. */
public final int sampleRateHz;
/** The number of channels. */
public final int channelCount;
/** The RFC 6381 codecs string. */
public final String codecs;
private Config(int sampleRateHz, int channelCount, String codecs) {
this.sampleRateHz = sampleRateHz;
this.channelCount = channelCount;
this.codecs = codecs;
}
}
// Audio sample count constants assume the frameLengthFlag in the access unit is 0.
/**
* Number of raw audio samples that are produced per channel when decoding an AAC LC access unit.
*/
public static final int AAC_LC_AUDIO_SAMPLE_COUNT = 1024;
/**
* Number of raw audio samples that are produced per channel when decoding an AAC XHE access unit.
*/
public static final int AAC_XHE_AUDIO_SAMPLE_COUNT = AAC_LC_AUDIO_SAMPLE_COUNT;
/**
* Number of raw audio samples that are produced per channel when decoding an AAC HE access unit.
*/
public static final int AAC_HE_AUDIO_SAMPLE_COUNT = 2048;
/**
* Number of raw audio samples that are produced per channel when decoding an AAC LD access unit.
*/
public static final int AAC_LD_AUDIO_SAMPLE_COUNT = 512;
// Maximum bitrates for AAC profiles from the Fraunhofer FDK AAC encoder documentation:
// https://cs.android.com/android/platform/superproject/+/android-9.0.0_r8:external/aac/libAACenc/include/aacenc_lib.h;l=718
/** Maximum rate for an AAC LC audio stream, in bytes per second. */
public static final int AAC_LC_MAX_RATE_BYTES_PER_SECOND = 800 * 1000 / 8;
/** Maximum rate for an AAC HE V1 audio stream, in bytes per second. */
public static final int AAC_HE_V1_MAX_RATE_BYTES_PER_SECOND = 128 * 1000 / 8;
/** Maximum rate for an AAC HE V2 audio stream, in bytes per second. */
public static final int AAC_HE_V2_MAX_RATE_BYTES_PER_SECOND = 56 * 1000 / 8;
/**
* Maximum rate for an AAC XHE audio stream, in bytes per second.
*
* <p>Fraunhofer documentation says "500 kbit/s and above" for stereo, so we use a rate generously
* above the 500 kbit/s level.
*/
public static final int AAC_XHE_MAX_RATE_BYTES_PER_SECOND = 2048 * 1000 / 8;
/**
* Maximum rate for an AAC ELD audio stream, in bytes per second.
*
* <p>Fraunhofer documentation shows AAC-ELD as useful for up to ~ 64 kbit/s so we use this value.
*/
public static final int AAC_ELD_MAX_RATE_BYTES_PER_SECOND = 64 * 1000 / 8;
private static final int AUDIO_SPECIFIC_CONFIG_FREQUENCY_INDEX_ARBITRARY = 0xF;
private static final int[] AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE =
new int[] {
96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350
};
private static final int AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID = -1;
/**
* In the channel configurations below, &lt;A&gt; indicates a single channel element; (A, B)
* indicates a channel pair element; and [A] indicates a low-frequency effects element. The
* speaker mapping short forms used are:
*
* <ul>
* <li>FC: front center
* <li>BC: back center
* <li>FL/FR: front left/right
* <li>FCL/FCR: front center left/right
* <li>FTL/FTR: front top left/right
* <li>SL/SR: back surround left/right
* <li>BL/BR: back left/right
* <li>LFE: low frequency effects
* </ul>
*/
private static final int[] AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE =
new int[] {
0,
1, /* mono: <FC> */
2, /* stereo: (FL, FR) */
3, /* 3.0: <FC>, (FL, FR) */
4, /* 4.0: <FC>, (FL, FR), <BC> */
5, /* 5.0 back: <FC>, (FL, FR), (SL, SR) */
6, /* 5.1 back: <FC>, (FL, FR), (SL, SR), <BC>, [LFE] */
8, /* 7.1 wide back: <FC>, (FCL, FCR), (FL, FR), (SL, SR), [LFE] */
AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID,
AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID,
AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID,
7, /* 6.1: <FC>, (FL, FR), (SL, SR), <RC>, [LFE] */
8, /* 7.1: <FC>, (FL, FR), (SL, SR), (BL, BR), [LFE] */
AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID,
8, /* 7.1 top: <FC>, (FL, FR), (SL, SR), [LFE], (FTL, FTR) */
AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID
};
/**
* Prefix for the RFC 6381 codecs string for AAC formats. To form a full codecs string, suffix the
* decimal AudioObjectType.
*/
private static final String CODECS_STRING_PREFIX = "mp4a.40.";
// Advanced Audio Coding Low-Complexity profile.
public static final int AUDIO_OBJECT_TYPE_AAC_LC = 2;
// Spectral Band Replication.
public static final int AUDIO_OBJECT_TYPE_AAC_SBR = 5;
// Error Resilient Bit-Sliced Arithmetic Coding.
public static final int AUDIO_OBJECT_TYPE_AAC_ER_BSAC = 22;
// Enhanced low delay.
public static final int AUDIO_OBJECT_TYPE_AAC_ELD = 23;
// Parametric Stereo.
public static final int AUDIO_OBJECT_TYPE_AAC_PS = 29;
// Escape code for extended audio object types.
private static final int AUDIO_OBJECT_TYPE_ESCAPE = 31;
// Extended high efficiency.
public static final int AUDIO_OBJECT_TYPE_AAC_XHE = 42;
/**
* Valid AAC Audio object types. One of {@link #AUDIO_OBJECT_TYPE_AAC_LC}, {@link
* #AUDIO_OBJECT_TYPE_AAC_SBR}, {@link #AUDIO_OBJECT_TYPE_AAC_ER_BSAC}, {@link
* #AUDIO_OBJECT_TYPE_AAC_ELD}, {@link #AUDIO_OBJECT_TYPE_AAC_PS} or {@link
* #AUDIO_OBJECT_TYPE_AAC_XHE}.
*/
@Documented
@Retention(RetentionPolicy.SOURCE)
@IntDef({
AUDIO_OBJECT_TYPE_AAC_LC,
AUDIO_OBJECT_TYPE_AAC_SBR,
AUDIO_OBJECT_TYPE_AAC_ER_BSAC,
AUDIO_OBJECT_TYPE_AAC_ELD,
AUDIO_OBJECT_TYPE_AAC_PS,
AUDIO_OBJECT_TYPE_AAC_XHE
})
public @interface AacAudioObjectType {}
/**
* Parses an AAC AudioSpecificConfig, as defined in ISO 14496-3 1.6.2.1
*
* @param audioSpecificConfig A byte array containing the AudioSpecificConfig to parse.
* @return The parsed configuration.
* @throws ParserException If the AudioSpecificConfig cannot be parsed because it is invalid or
* unsupported.
*/
public static Config parseAudioSpecificConfig(byte[] audioSpecificConfig) throws ParserException {
return parseAudioSpecificConfig(
new ParsableBitArray(audioSpecificConfig), /* forceReadToEnd= */ false);
}
/**
* Parses an AAC AudioSpecificConfig, as defined in ISO 14496-3 1.6.2.1
*
* @param bitArray A {@link ParsableBitArray} containing the AudioSpecificConfig to parse. The
* position is advanced to the end of the AudioSpecificConfig.
* @param forceReadToEnd Whether the entire AudioSpecificConfig should be read. Required for
* knowing the length of the configuration payload.
* @return The parsed configuration.
* @throws ParserException If the AudioSpecificConfig cannot be parsed because it is invalid or
* unsupported.
*/
public static Config parseAudioSpecificConfig(ParsableBitArray bitArray, boolean forceReadToEnd)
throws ParserException {
int audioObjectType = getAudioObjectType(bitArray);
int sampleRateHz = getSamplingFrequency(bitArray);
int channelConfiguration = bitArray.readBits(4);
String codecs = CODECS_STRING_PREFIX + audioObjectType;
if (audioObjectType == AUDIO_OBJECT_TYPE_AAC_SBR
|| audioObjectType == AUDIO_OBJECT_TYPE_AAC_PS) {
// For an AAC bitstream using spectral band replication (SBR) or parametric stereo (PS) with
// explicit signaling, we return the extension sampling frequency as the sample rate of the
// content; this is identical to the sample rate of the decoded output but may differ from
// the sample rate set above.
// Use the extensionSamplingFrequencyIndex.
sampleRateHz = getSamplingFrequency(bitArray);
audioObjectType = getAudioObjectType(bitArray);
if (audioObjectType == AUDIO_OBJECT_TYPE_AAC_ER_BSAC) {
// Use the extensionChannelConfiguration.
channelConfiguration = bitArray.readBits(4);
}
}
if (forceReadToEnd) {
switch (audioObjectType) {
case 1:
case 2:
case 3:
case 4:
case 6:
case 7:
case 17:
case 19:
case 20:
case 21:
case 22:
case 23:
parseGaSpecificConfig(bitArray, audioObjectType, channelConfiguration);
break;
default:
throw ParserException.createForUnsupportedContainerFeature(
"Unsupported audio object type: " + audioObjectType);
}
switch (audioObjectType) {
case 17:
case 19:
case 20:
case 21:
case 22:
case 23:
int epConfig = bitArray.readBits(2);
if (epConfig == 2 || epConfig == 3) {
throw ParserException.createForUnsupportedContainerFeature(
"Unsupported epConfig: " + epConfig);
}
break;
default:
break;
}
}
// For supported containers, bits_to_decode() is always 0.
int channelCount = AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE[channelConfiguration];
if (channelCount == AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID) {
throw ParserException.createForMalformedContainer(/* message= */ null, /* cause= */ null);
}
return new Config(sampleRateHz, channelCount, codecs);
}
/**
* Builds a simple AAC LC AudioSpecificConfig, as defined in ISO 14496-3 1.6.2.1
*
* @param sampleRate The sample rate in Hz.
* @param channelCount The channel count.
* @return The AudioSpecificConfig.
*/
public static byte[] buildAacLcAudioSpecificConfig(int sampleRate, int channelCount) {
int sampleRateIndex = C.INDEX_UNSET;
for (int i = 0; i < AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE.length; ++i) {
if (sampleRate == AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE[i]) {
sampleRateIndex = i;
}
}
int channelConfig = C.INDEX_UNSET;
for (int i = 0; i < AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE.length; ++i) {
if (channelCount == AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE[i]) {
channelConfig = i;
}
}
if (sampleRate == C.INDEX_UNSET || channelConfig == C.INDEX_UNSET) {
throw new IllegalArgumentException(
"Invalid sample rate or number of channels: " + sampleRate + ", " + channelCount);
}
return buildAudioSpecificConfig(AUDIO_OBJECT_TYPE_AAC_LC, sampleRateIndex, channelConfig);
}
/**
* Builds a simple AudioSpecificConfig, as defined in ISO 14496-3 1.6.2.1
*
* @param audioObjectType The audio object type.
* @param sampleRateIndex The sample rate index.
* @param channelConfig The channel configuration.
* @return The AudioSpecificConfig.
*/
public static byte[] buildAudioSpecificConfig(
int audioObjectType, int sampleRateIndex, int channelConfig) {
byte[] specificConfig = new byte[2];
specificConfig[0] = (byte) (((audioObjectType << 3) & 0xF8) | ((sampleRateIndex >> 1) & 0x07));
specificConfig[1] = (byte) (((sampleRateIndex << 7) & 0x80) | ((channelConfig << 3) & 0x78));
return specificConfig;
}
/**
* Returns the AAC audio object type as specified in 14496-3 (2005) Table 1.14.
*
* @param bitArray The bit array containing the audio specific configuration.
* @return The audio object type.
*/
private static int getAudioObjectType(ParsableBitArray bitArray) {
int audioObjectType = bitArray.readBits(5);
if (audioObjectType == AUDIO_OBJECT_TYPE_ESCAPE) {
audioObjectType = 32 + bitArray.readBits(6);
}
return audioObjectType;
}
/**
* Returns the AAC sampling frequency (or extension sampling frequency) as specified in 14496-3
* (2005) Table 1.13.
*
* @param bitArray The bit array containing the audio specific configuration.
* @return The sampling frequency.
* @throws ParserException If the audio specific configuration is invalid.
*/
private static int getSamplingFrequency(ParsableBitArray bitArray) throws ParserException {
int samplingFrequency;
int frequencyIndex = bitArray.readBits(4);
if (frequencyIndex == AUDIO_SPECIFIC_CONFIG_FREQUENCY_INDEX_ARBITRARY) {
samplingFrequency = bitArray.readBits(24);
} else if (frequencyIndex < 13) {
samplingFrequency = AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE[frequencyIndex];
} else {
throw ParserException.createForMalformedContainer(/* message= */ null, /* cause= */ null);
}
return samplingFrequency;
}
private static void parseGaSpecificConfig(
ParsableBitArray bitArray, int audioObjectType, int channelConfiguration) {
boolean frameLengthFlag = bitArray.readBit();
if (frameLengthFlag) {
Log.w(TAG, "Unexpected frameLengthFlag = 1");
}
boolean dependsOnCoreDecoder = bitArray.readBit();
if (dependsOnCoreDecoder) {
bitArray.skipBits(14); // coreCoderDelay.
}
boolean extensionFlag = bitArray.readBit();
if (channelConfiguration == 0) {
throw new UnsupportedOperationException(); // TODO: Implement programConfigElement();
}
if (audioObjectType == 6 || audioObjectType == 20) {
bitArray.skipBits(3); // layerNr.
}
if (extensionFlag) {
if (audioObjectType == 22) {
bitArray.skipBits(16); // numOfSubFrame (5), layer_length(11).
}
if (audioObjectType == 17
|| audioObjectType == 19
|| audioObjectType == 20
|| audioObjectType == 23) {
// aacSectionDataResilienceFlag, aacScalefactorDataResilienceFlag,
// aacSpectralDataResilienceFlag.
bitArray.skipBits(3);
}
bitArray.skipBits(1); // extensionFlag3.
}
}
private AacUtil() {}
}