From a155a39028328b691c52bf0c897f3541f612fc63 Mon Sep 17 00:00:00 2001 From: michael Date: Tue, 5 Oct 2004 12:04:56 +0000 Subject: remove non native codec specific data move lang to the info packet git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@13560 b3059339-0415-0410-9bf9-f77b7e298cf2 --- DOCS/tech/mncf.txt | 661 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 661 insertions(+) create mode 100644 DOCS/tech/mncf.txt (limited to 'DOCS') diff --git a/DOCS/tech/mncf.txt b/DOCS/tech/mncf.txt new file mode 100644 index 0000000000..5afb1045be --- /dev/null +++ b/DOCS/tech/mncf.txt @@ -0,0 +1,661 @@ + NUT Open Container Format DRAFT 20041005 (Michael's experimental fork) + ---------------------------------------- + + + + Intro: + +Features / goals: + (supported by the format, not necessary by a specific implementation) + +Simple + use the same encoding for nearly all fields + simple decoding, so slow cpus (and embedded systems) can handle it +Extendible + no limit for the possible values for all fields (using universal vlc) + allow adding of new headers in the future + allow adding more fields at the end of headers +Compact + ~0.2% overhead, for normal bitrates + index is <10kb per hour (1 keyframe every 3sec) + a usual header for a file is about 100bytes (audio + video headers together) + a packet header is about ~1-8 bytes +Error resistant + seeking / playback without an index + headers & index can be repeated + damaged files can be played back with minimal data lost and fast + resyncing times + + + + Definitions: + +MUST the specific part must be done to conform to this standard +SHOULD its recommanded to be done that way but its not strictly required + + + + Syntax: + + Type definitions: + +f(x) n fixed bits in big-endian order +u(x) unsigned number encoded in x bits in MSB first order + +v + value=0 + do{ + more_data u(1) + data u(7) + value= 128*value + data + }while(more_data) + +s + temp v + temp++ + if(temp&1) value= -(temp>>1) + else value= (temp>>1) + +b (binary data or string) + for(i=0; i0) tmp_timestamp s + if(tmp_fields>1) tmp_mul v + if(tmp_fields>2) tmp_stream v + if(tmp_fields>3) tmp_size v + else tmp_size=0 + if(tmp_fields>4) tmp_res v + else tmp_res=0 + if(tmp_fields>5) count v + else count= tmp_mul - tmp_size + for(j=6; j 0 + stream_id MUST be < stream_count + +stream_class + 0 video + 32 audio + 64 subtiles + Note the remaining values are reserved and MUST NOT be used + a demuxer MUST ignore streams with reserved classes + +fourcc + identification for the codec + example: "H264" + MUST contain 2 or 4 bytes, note, this might be increased in the future + if needed + +time_base_nom / time_base_denom = time_base + the number of timer ticks per second, this MUST be equal to the fps + if the fixed_fps is 1 + time_base_denom MUST not be 0 + time_base_nom and time_base_denom MUST be relative prime + time_base_nom MUST be < 2^31 + examples: + fps time_base_nom time_base_denom + 30 30 1 + 29.97 30000 1001 + 23.976 24000 1001 + sample_rate sample_rate_mul time_base_nom time_base_denom + 44100 1 44100 1 + 44100 64 11025 16 + 48000 1024 375 8 + Note: the advantage to using a large sample_rate_mul is that the + timestamps need fewer bits + +global_time_base_nom / global_time_base_denom = global_time_base + the number of timer ticks per second + global_time_base_denom MUST not be 0 + global_time_base_nom and global_time_base_denom MUST be relative prime + global_time_base_nom MUST be < 2^31 + +global_timestamp + timestamp in global_time_base units + when a global_timestamp is encountered the last_timestamp of all streams + is set to the following: + ln= global_time_base_denom*time_base_nom + sn= global_timestamp + d1= global_time_base_nom + d2= time_base_denom + last_timestamp= (ln/d1*sn + ln%d1*sn/d1)/d2 + Note, this calculation MUST be done with unsigned 64 bit integers, and + is equivalent to (ln*sn)/(d1*d2) but this would require a 96bit integer + +msb_timestamp_shift + amount of bits in lsb_timestamp + MUST be <16 + +decode_delay + maximum time between input and output for a codec, used to generate dts + from pts + is 0 for streams without b frames, and 1 for streams with b frames, may + be larger for future codecs + +fixed_fps + 1 indicates that the fps is fixed + +codec_specific_data + private global data for a codec (could be huffman tables or ...) + +frame_code + the meaning of this byte is stored in the main header + the value 78 ('N') is forbidden to ensure that the byte is always + different from the first byte of any startcode + +flags[frame_code] + the bits of the flags from MSB to LSB are KD + if D is 1 then data_size_msb is coded, otherwise data_size_msb is 0 + K is the keyframe_type + 0-> no keyframe, + 1-> keyframe, + flags=4 can be used to mark illegal frame_code bytes + frame_code=78 must have flags=4 + * frames MUST not depend(1) upon frames prior to the last + frame_startcode + depend(1) means dependancy on the container level (NUT) not dependancy + on the codec level + +stream_id_plus1[frame_code] + must be <250 + if its 0 then the stream_id is coded in the frame + +data_size_mul[frame_code] + must be <16384 + +data_size_lsb[frame_code] + must be <16384 + +timestamp_delta[frame_code] + must be <16384 and >-16384 + +data_size + data_size= data_size_lsb + data_size_msb*data_size_mul; + +coded_timestamp + if coded_timestamp < (1< timestamp=0 + frame lsb_timestamp=3 -> timestamp=3 + frame lsb_timestamp=1 -> timestamp=1 + frame lsb_timestamp=2 -> timestamp=2 + ... + keyframe msb_timestamp=257 -> timestamp=257 + frame lsb_timestamp=255->timestamp=255 + frame lsb_timestamp=0 -> timestamp=256 + frame lsb_timestamp=4 -> timestamp=260 + frame lsb_timestamp=2 -> timestamp=258 + frame lsb_timestamp=3 -> timestamp=259 + all timestamps of keyframes of a single stream MUST be monotone + +dts + dts are calculated by using a decode_delay+1 sized buffer for each + stream, into which the current pts is inserted and the element with + the smallest value is removed, this is then the current dts + this buffer is initalized with decode_delay -1 elements + all frames with dts == timestamp must be monotone, that means a frame + which occures later in the stream must have a larger or equal dts + then an earlier frame + FIXME rename timestamp* to pts* ? + +width/height + MUST be set to the coded width/height + +sample_width/sample_height (aspect ratio) + sample_width is the horizontal distance between samples + sample_width and sample_height MUST be relative prime if not zero + MUST be 0 if unknown + +colorspace_type + 0 unknown + 1 ITU Rec 624 / ITU Rec 601 Y range: 16..235 Cb/Cr range: 16..240 + 2 ITU Rec 709 Y range: 16..235 Cb/Cr range: 16..240 + 17 ITU Rec 624 / ITU Rec 601 Y range: 0..255 Cb/Cr range: 0..255 + 18 ITU Rec 709 Y range: 0..255 Cb/Cr range: 0..255 + +samplerate_nom / samplerate_denom = samplerate + the number of samples per second + +checksum + adler32 checksum + +index_timestamp + value of the timetamp in a sync point relative to the last sync-point + +index_position + position in bytes of the first byte of a sync-point, relative to the + last sync_point + +id + the id of the type/name pair, so its more compact + 0 means end + +type + for example: "UTF8" -> String or "JPEG" -> jpeg image + Note: nonstandard fields should be prefixed by "X-" + Note: MUST be less than 6 byte long (might be increased to 64 later) + +name + the name of the info entry, valid names are + "TotalTime" total length of the stream in msecs + "StreamId" the stream(s) to which the info packet applies + "StartTimestamp" + "EndTimestamp" the time range in msecs to which the info applies + "SegmentId" a unique id for the streams + time specified + "Author" + "Description" + "Copyright" + "Encoder" the name & version of the software used for encoding + "Title" + "Cover" an image of the (cd,dvd,vhs,..) cover (preferable PNG or JPEG) + "Source" "DVD", "VCD", "CD", "MD", "FM radio", "VHS", "TV", + "LD" + Optional: appended PAL,NTSC,SECAM, ... in parentheses + "CaptureDevice" "BT878", "BT848", "webcam", ... (more exact names are fine too) + "CreationTime" "2003-01-20 20:13:15Z", ... + (ISO 8601 format, see http://www.cl.cam.ac.uk/~mgk25/iso-time.html) + Note: dont forget the timezone + "ReplayGain" + "Keywords" + "Language" ISO 639 and ISO 3166 for language/country code + something like "usen" (US english), can be 0 if unknown + and "multi" if several languages + see http://www.loc.gov/standards/iso639-2/englangn.html + and http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.htmlthe language code + "Disposition" "original", "dub" (translated), "comment", "lyrics", "karaoke" + Note: if someone needs some others, please tell us about them, so we can + add them to the official standard (if they are sane) + Note: nonstandard fields should be prefixed by "X-" + Note: MUST be less than 64 bytes long + +value + value of this name/type pair + +stuffing + 0x80 can be placed infront of any type v entry for stuffing + purposes + +info_table[][2]={ + {NULL , NULL }, // end + {NULL , NULL }, + {NULL , "UTF8"}, + {NULL , "v"}, + {NULL , "s"}, + {"StreamId" , "v"}, + {"SegmentId" , "v"}, + {"StartTimestamp" , "v"}, + {"EndTimestamp" , "v"}, + {"Author" , "UTF8"}, + {"Titel" , "UTF8"}, + {"Language" , "UTF8"}, + {"Description" , "UTF8"}, + {"Copyright" , "UTF8"}, + {"Encoder" , "UTF8"}, + {"Keyword" , "UTF8"}, + {"Cover" , "JPEG"}, + {"Cover" , "PNG"}, + {"Disposition" , "UTF8"}, +}; + + Structure: + +the headers MUST be in exactly the following order (to simplify demuxer design) +main header +stream_header (id=0) +stream_header (id=1) +... +stream_header (id=n) + +headers may be repated, but if they are then they MUST all be repeated together +and repeated headers MUST be identical +headers MAY only repeated at the closest possible positions after 2^x where x is +an integer and the file end, so the headers may be repeated at 4102 if thats the +closest possition after 2^12=4096 at which the headers can be placed + +headers MUST be placed at least at the begin of the file and immedeatly before +the index or at the file end if there is no index +headers MUST be repeated at least twice (so they exist 3 times in a file) + +a demuxer MUST not demux a stream which contains more than one stream, or which +is wrapped in a structure to facilitate more than one stream or otherwise +duplicate the role of a container. any such file is to be considered invalid + +info packets which describe the whole file or individual streams/tracks must be +placed before any video/audio/... frames + + Index +every sync-point must be exacty once in the index +Note: in case of realtime streaming there is no end, so no index there either + + Info packets +the info_packet can be repeated, it can also contain different names & values +each time but only if allso the time is different +Info packets can be used to describe the file or some part of it (chapters) + +info packets, SHOULD be placed at the begin of the file at least +for realtime streaming info packets will normally be transmitted when they apply +for example, the current song title & artist of the currently shown music video + + Unknown packets +MUST be ignored by the demuxer + + demuxer (non-normative) + +in the absence of valid header at beginning, players SHOULD search for backup +headers starting at offset 2^x for each x players SHOULD end their search from a +particular offset when any startcode is found (including syncpoint) + + + Sample code (GPL, & untested) + +typedef BufferContext{ + uint8_t *buf; + uint8_t *buf_ptr; +}BufferContext; + +static inline uint64_t get_bytes(BufferContext *bc, int count){ + uint64_t val=0; + + assert(count>0 && count<9) + + for(i=0; ibuf_ptr++); + } + + return val; +} + +static inline void put_bytes(BufferContext *bc, int count, uint64_t val){ + uint64_t val=0; + + assert(count>0 && count<9) + + for(i=count-1; i>=0; i--){ + *(bc->buf_ptr++)= val >> (8*i); + } + + return val; +} + +static inline uint64_t get_v(BufferContext *bc){ + uint64_t val= 0; + + for(; space_left(bc) > 0; ){ + int tmp= *(bc->buf_ptr++); + if(tmp&0x80) + val= (val<<7) + tmp - 0x80; + else + return (val<<7) + tmp; + } + + return -1; +} + +static inline int put_v(BufferContext *bc, uint64_t val){ + int i; + + if(space_left(bc) < 9) return -1; + + val &= 0x7FFFFFFFFFFFFFFFULL; // FIXME can only encode upto 63 bits currently + for(i=7; ; i+=7){ + if(val>>i == 0) break; + } + + for(i-=7; i>0; i-=7){ + *(bc->buf_ptr++)= 0x80 | (val>>i); + } + *(bc->buf_ptr++)= val&0x7F; + + return 0; +} + +static int64_t get_dts(int64_t pts, int64_t *pts_cache, int delay, int reset){ + if(reset) memset(pts_cache, -1, delay*sizeof(int64_t)); + + while(delay--){ + int64_t t= pts_cache[delay]; + if(t < pts){ + pts_cache[delay]= pts; + pts= t; + } + } + + return pts; +} + + Authors + +Folks from MPlayer Developers Mailinglist (http://www.mplayehrq.hu/). +Authors in ABC-order: (FIXME! Tell us if we left you out) + Beregszaszi, Alex (alex@fsn.hu) + Bunkus, Moritz (moritz@bunkus.org) + Diedrich, Tobias (td@sim.uni-hannover.de) + Felker, Rich (dalias@aerifal.cx) + Franz, Fabian (FabianFranz@gmx.de) + Gereoffy, Arpad (arpi@thot.banki.hu) + Hess, Andreas (jaska@gmx.net) + Niedermayer, Michael (michaelni@gmx.at) -- cgit v1.2.3