123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713 |
- # Found on a russian zope mailing list, and modified to fix bugs in parsing
- # the magic file and string making
- # -- Daniel Berlin <dberlin@dberlin.org>
- import sys, struct, time, re, exceptions, pprint, stat, os, pwd, grp
- _mew = 0
- # _magic='/tmp/magic'
- # _magic='/usr/share/magic.mime'
- _magic='/usr/share/magic.mime'
- mime = 1
- _ldate_adjust = lambda x: time.mktime( time.gmtime(x) )
- BUFFER_SIZE = 1024 * 128 # 128K should be enough...
- class MagicError(exceptions.Exception): pass
- def _handle(fmt='@x',adj=None): return fmt, struct.calcsize(fmt), adj
- KnownTypes = {
- # 'byte':_handle('@b'),
- 'byte':_handle('@B'),
- 'ubyte':_handle('@B'),
- 'string':('s',0,None),
- 'pstring':_handle('p'),
- # 'short':_handle('@h'),
- # 'beshort':_handle('>h'),
- # 'leshort':_handle('<h'),
- 'short':_handle('@H'),
- 'beshort':_handle('>H'),
- 'leshort':_handle('<H'),
- 'ushort':_handle('@H'),
- 'ubeshort':_handle('>H'),
- 'uleshort':_handle('<H'),
- 'long':_handle('@l'),
- 'belong':_handle('>l'),
- 'lelong':_handle('<l'),
- 'ulong':_handle('@L'),
- 'ubelong':_handle('>L'),
- 'ulelong':_handle('<L'),
- 'date':_handle('=l'),
- 'bedate':_handle('>l'),
- 'ledate':_handle('<l'),
- 'ldate':_handle('=l',_ldate_adjust),
- 'beldate':_handle('>l',_ldate_adjust),
- 'leldate':_handle('<l',_ldate_adjust),
- }
- _mew_cnt = 0
- def mew(x):
- global _mew_cnt
- if _mew :
- if x=='.' :
- _mew_cnt += 1
- if _mew_cnt % 64 == 0 : sys.stderr.write( '\n' )
- sys.stderr.write( '.' )
- else:
- sys.stderr.write( '\b'+x )
- def has_format(s):
- n = 0
- l = None
- for c in s :
- if c == '%' :
- if l == '%' : n -= 1
- else : n += 1
- l = c
- return n
- def read_asciiz(file,size=None,pos=None):
- s = []
- if pos :
- mew('s')
- file.seek( pos, 0 )
- mew('z')
- if size is not None :
- s = [file.read( size ).split('\0')[0]]
- else:
- while 1 :
- c = file.read(1)
- if (not c) or (ord(c)==0) or (c=='\n') : break
- s.append (c)
- mew('Z')
- return ''.join(s)
- def a2i(v,base=0):
- if v[-1:] in 'lL' : v = v[:-1]
- return int( v, base )
- _cmap = {
- '\\' : '\\',
- '0' : '\0',
- }
- for c in range(ord('a'),ord('z')+1) :
- try : e = eval('"\\%c"' % chr(c))
- except ValueError : pass
- else : _cmap[chr(c)] = e
- else:
- del c
- del e
- def make_string(s):
- return eval( '"'+s.replace('"','\\"')+'"')
- class MagicTestError(MagicError): pass
- class MagicTest:
- def __init__(self,offset,mtype,test,message,line=None,level=None):
- self.line, self.level = line, level
- self.mtype = mtype
- self.mtest = test
- self.subtests = []
- self.mask = None
- self.smod = None
- self.nmod = None
- self.offset, self.type, self.test, self.message = \
- offset,mtype,test,message
- if self.mtype == 'true' : return # XXX hack to enable level skips
- if test[-1:]=='\\' and test[-2:]!='\\\\' :
- self.test += 'n' # looks like someone wanted EOL to match?
- if mtype[:6]=='string' :
- if '/' in mtype : # for strings
- self.type, self.smod = \
- mtype[:mtype.find('/')], mtype[mtype.find('/')+1:]
- else:
- for nm in '&+-' :
- if nm in mtype : # for integer-based
- self.nmod, self.type, self.mask = (
- nm,
- mtype[:mtype.find(nm)],
- # convert mask to int, autodetect base
- int( mtype[mtype.find(nm)+1:], 0 )
- )
- break
- self.struct, self.size, self.cast = KnownTypes[ self.type ]
- def __str__(self):
- return '%s %s %s %s' % (
- self.offset, self.mtype, self.mtest, self.message
- )
- def __repr__(self):
- return 'MagicTest(%s,%s,%s,%s,line=%s,level=%s,subtests=\n%s%s)' % (
- `self.offset`, `self.mtype`, `self.mtest`, `self.message`,
- `self.line`, `self.level`,
- '\t'*self.level, pprint.pformat(self.subtests)
- )
- def run(self,file):
- result = ''
- do_close = 0
- try:
- if type(file) == type('x') :
- file = open( file, 'r', BUFFER_SIZE )
- do_close = 1
- # else:
- # saved_pos = file.tell()
- if self.mtype != 'true' :
- data = self.read(file)
- last = file.tell()
- else:
- data = last = None
- if self.check( data ) :
- result = self.message+' '
- if has_format( result ) : result %= data
- for test in self.subtests :
- m = test.run(file)
- if m is not None : result += m
- return make_string( result )
- finally:
- if do_close :
- file.close()
- # else:
- # file.seek( saved_pos, 0 )
- def get_mod_and_value(self):
- if self.type[-6:] == 'string' :
- # "something like\tthis\n"
- if self.test[0] in '=<>' :
- mod, value = self.test[0], make_string( self.test[1:] )
- else:
- mod, value = '=', make_string( self.test )
- else:
- if self.test[0] in '=<>&^' :
- mod, value = self.test[0], a2i(self.test[1:])
- elif self.test[0] == 'x':
- mod = self.test[0]
- value = 0
- else:
- mod, value = '=', a2i(self.test)
- return mod, value
- def read(self,file):
- mew( 's' )
- file.seek( self.offset(file), 0 ) # SEEK_SET
- mew( 'r' )
- try:
- data = rdata = None
- # XXX self.size might be 0 here...
- if self.size == 0 :
- # this is an ASCIIZ string...
- size = None
- if self.test != '>\\0' : # magic's hack for string read...
- value = self.get_mod_and_value()[1]
- size = (value=='\0') and None or len(value)
- rdata = data = read_asciiz( file, size=size )
- else:
- rdata = file.read( self.size )
- if not rdata or (len(rdata)!=self.size) : return None
- data = struct.unpack( self.struct, rdata )[0] # XXX hack??
- except:
- print >>sys.stderr, self
- print >>sys.stderr, '@%s struct=%s size=%d rdata=%s' % (
- self.offset, `self.struct`, self.size,`rdata`)
- raise
- mew( 'R' )
- if self.cast : data = self.cast( data )
- if self.mask :
- try:
- if self.nmod == '&' : data &= self.mask
- elif self.nmod == '+' : data += self.mask
- elif self.nmod == '-' : data -= self.mask
- else: raise MagicTestError(self.nmod)
- except:
- print >>sys.stderr,'data=%s nmod=%s mask=%s' % (
- `data`, `self.nmod`, `self.mask`
- )
- raise
- return data
- def check(self,data):
- mew('.')
- if self.mtype == 'true' :
- return '' # not None !
- mod, value = self.get_mod_and_value()
- if self.type[-6:] == 'string' :
- # "something like\tthis\n"
- if self.smod :
- xdata = data
- if 'b' in self.smod : # all blanks are optional
- xdata = ''.join( data.split() )
- value = ''.join( value.split() )
- if 'c' in self.smod : # all blanks are optional
- xdata = xdata.upper()
- value = value.upper()
- # if 'B' in self.smod : # compact blanks
- ### XXX sorry, i don't understand this :-(
- # data = ' '.join( data.split() )
- # if ' ' not in data : return None
- else:
- xdata = data
- try:
- if mod == '=' : result = data == value
- elif mod == '<' : result = data < value
- elif mod == '>' : result = data > value
- elif mod == '&' : result = data & value
- elif mod == '^' : result = (data & (~value)) == 0
- elif mod == 'x' : result = 1
- else : raise MagicTestError(self.test)
- if result :
- zdata, zval = `data`, `value`
- if self.mtype[-6:]!='string' :
- try: zdata, zval = hex(data), hex(value)
- except: zdata, zval = `data`, `value`
- if 0 : print >>sys.stderr, '%s @%s %s:%s %s %s => %s (%s)' % (
- '>'*self.level, self.offset,
- zdata, self.mtype, `mod`, zval, `result`,
- self.message
- )
- return result
- except:
- print >>sys.stderr,'mtype=%s data=%s mod=%s value=%s' % (
- `self.mtype`, `data`, `mod`, `value`
- )
- raise
- def add(self,mt):
- if not isinstance(mt,MagicTest) :
- raise MagicTestError((mt,'incorrect subtest type %s'%(type(mt),)))
- if mt.level == self.level+1 :
- self.subtests.append( mt )
- elif self.subtests :
- self.subtests[-1].add( mt )
- elif mt.level > self.level+1 :
- # it's possible to get level 3 just after level 1 !!! :-(
- level = self.level + 1
- while level < mt.level :
- xmt = MagicTest(None,'true','x','',line=self.line,level=level)
- self.add( xmt )
- level += 1
- else:
- self.add( mt ) # retry...
- else:
- raise MagicTestError((mt,'incorrect subtest level %s'%(`mt.level`,)))
- def last_test(self):
- return self.subtests[-1]
- #end class MagicTest
- class OffsetError(MagicError): pass
- class Offset:
- pos_format = {'b':'<B','B':'>B','s':'<H','S':'>H','l':'<I','L':'>I',}
- pattern0 = re.compile(r''' # mere offset
- ^
- &? # possible ampersand
- ( 0 # just zero
- | [1-9]{1,1}[0-9]* # decimal
- | 0[0-7]+ # octal
- | 0x[0-9a-f]+ # hex
- )
- $
- ''', re.X|re.I
- )
- pattern1 = re.compile(r''' # indirect offset
- ^\(
- (?P<base>&?0 # just zero
- |&?[1-9]{1,1}[0-9]* # decimal
- |&?0[0-7]* # octal
- |&?0x[0-9A-F]+ # hex
- )
- (?P<type>
- \. # this dot might be alone
- [BSL]? # one of this chars in either case
- )?
- (?P<sign>
- [-+]{0,1}
- )?
- (?P<off>0 # just zero
- |[1-9]{1,1}[0-9]* # decimal
- |0[0-7]* # octal
- |0x[0-9a-f]+ # hex
- )?
- \)$''', re.X|re.I
- )
- def __init__(self,s):
- self.source = s
- self.value = None
- self.relative = 0
- self.base = self.type = self.sign = self.offs = None
- m = Offset.pattern0.match( s )
- if m : # just a number
- if s[0] == '&' :
- self.relative, self.value = 1, int( s[1:], 0 )
- else:
- self.value = int( s, 0 )
- return
- m = Offset.pattern1.match( s )
- if m : # real indirect offset
- try:
- self.base = m.group('base')
- if self.base[0] == '&' :
- self.relative, self.base = 1, int( self.base[1:], 0 )
- else:
- self.base = int( self.base, 0 )
- if m.group('type') : self.type = m.group('type')[1:]
- self.sign = m.group('sign')
- if m.group('off') : self.offs = int( m.group('off'), 0 )
- if self.sign == '-' : self.offs = 0 - self.offs
- except:
- print >>sys.stderr, '$$', m.groupdict()
- raise
- return
- raise OffsetError(`s`)
- def __call__(self,file=None):
- if self.value is not None : return self.value
- pos = file.tell()
- try:
- if not self.relative : file.seek( self.offset, 0 )
- frmt = Offset.pos_format.get( self.type, 'I' )
- size = struct.calcsize( frmt )
- data = struct.unpack( frmt, file.read( size ) )
- if self.offs : data += self.offs
- return data
- finally:
- file.seek( pos, 0 )
- def __str__(self): return self.source
- def __repr__(self): return 'Offset(%s)' % `self.source`
- #end class Offset
- class MagicFileError(MagicError): pass
- class MagicFile:
- def __init__(self,filename=_magic):
- self.file = None
- self.tests = []
- self.total_tests = 0
- self.load( filename )
- self.ack_tests = None
- self.nak_tests = None
- def __del__(self):
- self.close()
- def load(self,filename=None):
- self.open( filename )
- self.parse()
- self.close()
- def open(self,filename=None):
- self.close()
- if filename is not None :
- self.filename = filename
- self.file = open( self.filename, 'r', BUFFER_SIZE )
- def close(self):
- if self.file :
- self.file.close()
- self.file = None
- def parse(self):
- line_no = 0
- for line in self.file.xreadlines() :
- line_no += 1
- if not line or line[0]=='#' : continue
- line = line.lstrip().rstrip('\r\n')
- if not line or line[0]=='#' : continue
- try:
- x = self.parse_line( line )
- if x is None :
- print >>sys.stderr, '#[%04d]#'%line_no, line
- continue
- except:
- print >>sys.stderr, '###[%04d]###'%line_no, line
- raise
- self.total_tests += 1
- level, offset, mtype, test, message = x
- new_test = MagicTest(offset,mtype,test,message,
- line=line_no,level=level)
- try:
- if level == 0 :
- self.tests.append( new_test )
- else:
- self.tests[-1].add( new_test )
- except:
- if 1 :
- print >>sys.stderr, 'total tests=%s' % (
- `self.total_tests`,
- )
- print >>sys.stderr, 'level=%s' % (
- `level`,
- )
- print >>sys.stderr, 'tests=%s' % (
- pprint.pformat(self.tests),
- )
- raise
- else:
- while self.tests[-1].level > 0 :
- self.tests.pop()
- def parse_line(self,line):
- # print >>sys.stderr, 'line=[%s]' % line
- if (not line) or line[0]=='#' : return None
- level = 0
- offset = mtype = test = message = ''
- mask = None
- # get optional level (count leading '>')
- while line and line[0]=='>' :
- line, level = line[1:], level+1
- # get offset
- while line and not line[0].isspace() :
- offset, line = offset+line[0], line[1:]
- try:
- offset = Offset(offset)
- except:
- print >>sys.stderr, 'line=[%s]' % line
- raise
- # skip spaces
- line = line.lstrip()
- # get type
- c = None
- while line :
- last_c, c, line = c, line[0], line[1:]
- if last_c!='\\' and c.isspace() :
- break # unescaped space - end of field
- else:
- mtype += c
- if last_c == '\\' :
- c = None # don't fuck my brain with sequential backslashes
- # skip spaces
- line = line.lstrip()
- # get test
- c = None
- while line :
- last_c, c, line = c, line[0], line[1:]
- if last_c!='\\' and c.isspace() :
- break # unescaped space - end of field
- else:
- test += c
- if last_c == '\\' :
- c = None # don't fuck my brain with sequential backslashes
- # skip spaces
- line = line.lstrip()
- # get message
- message = line
- if mime and line.find("\t") != -1:
- message=line[0:line.find("\t")]
- #
- # print '>>', level, offset, mtype, test, message
- return level, offset, mtype, test, message
- def detect(self,file):
- self.ack_tests = 0
- self.nak_tests = 0
- answers = []
- for test in self.tests :
- message = test.run( file )
- if message :
- self.ack_tests += 1
- answers.append( message )
- else:
- self.nak_tests += 1
- if answers :
- return '; '.join( answers )
- #end class MagicFile
- def username(uid):
- try:
- return pwd.getpwuid( uid )[0]
- except:
- return '#%s'%uid
- def groupname(gid):
- try:
- return grp.getgrgid( gid )[0]
- except:
- return '#%s'%gid
- def get_file_type(fname,follow):
- t = None
- if not follow :
- try:
- st = os.lstat( fname ) # stat that entry, don't follow links!
- except os.error, why :
- pass
- else:
- if stat.S_ISLNK(st[stat.ST_MODE]) :
- t = 'symbolic link'
- try:
- lnk = os.readlink( fname )
- except:
- t += ' (unreadable)'
- else:
- t += ' to '+lnk
- if t is None :
- try:
- st = os.stat( fname )
- except os.error, why :
- return "can't stat `%s' (%s)." % (why.filename,why.strerror)
- dmaj, dmin = (st.st_rdev>>8)&0x0FF, st.st_rdev&0x0FF
- if 0 : pass
- elif stat.S_ISSOCK(st.st_mode) : t = 'socket'
- elif stat.S_ISLNK (st.st_mode) : t = follow and 'symbolic link' or t
- elif stat.S_ISREG (st.st_mode) : t = 'file'
- elif stat.S_ISBLK (st.st_mode) : t = 'block special (%d/%d)'%(dmaj,dmin)
- elif stat.S_ISDIR (st.st_mode) : t = 'directory'
- elif stat.S_ISCHR (st.st_mode) : t = 'character special (%d/%d)'%(dmaj,dmin)
- elif stat.S_ISFIFO(st.st_mode) : t = 'pipe'
- else: t = '<unknown>'
- if st.st_mode & stat.S_ISUID :
- t = 'setuid(%d=%s) %s'%(st.st_uid,username(st.st_uid),t)
- if st.st_mode & stat.S_ISGID :
- t = 'setgid(%d=%s) %s'%(st.st_gid,groupname(st.st_gid),t)
- if st.st_mode & stat.S_ISVTX :
- t = 'sticky '+t
- return t
- HELP = '''%s [options] [files...]
- Options:
- -?, --help -- this help
- -m, --magic=<file> -- use this magic <file> instead of %s
- -f, --files=<namefile> -- read filenames for <namefile>
- * -C, --compile -- write "compiled" magic file
- -b, --brief -- don't prepend filenames to output lines
- + -c, --check -- check the magic file
- -i, --mime -- output MIME types
- * -k, --keep-going -- don't stop st the first match
- -n, --flush -- flush stdout after each line
- -v, --verson -- print version and exit
- * -z, --compressed -- try to look inside compressed files
- -L, --follow -- follow symlinks
- -s, --special -- don't skip special files
- * -- not implemented so far ;-)
- + -- implemented, but in another way...
- '''
- def main():
- import getopt
- global _magic
- try:
- brief = 0
- flush = 0
- follow= 0
- mime = 0
- check = 0
- special=0
- try:
- opts, args = getopt.getopt(
- sys.argv[1:],
- '?m:f:CbciknvzLs',
- ( 'help',
- 'magic=',
- 'names=',
- 'compile',
- 'brief',
- 'check',
- 'mime',
- 'keep-going',
- 'flush',
- 'version',
- 'compressed',
- 'follow',
- 'special',
- )
- )
- except getopt.error, why:
- print >>sys.stderr, sys.argv[0], why
- return 1
- else:
- files = None
- for o,v in opts :
- if o in ('-?','--help'):
- print HELP % (
- sys.argv[0],
- _magic,
- )
- return 0
- elif o in ('-f','--files='):
- files = v
- elif o in ('-m','--magic='):
- _magic = v[:]
- elif o in ('-C','--compile'):
- pass
- elif o in ('-b','--brief'):
- brief = 1
- elif o in ('-c','--check'):
- check = 1
- elif o in ('-i','--mime'):
- mime = 1
- if os.path.exists( _magic+'.mime' ) :
- _magic += '.mime'
- print >>sys.stderr,sys.argv[0]+':',\
- "Using regular magic file `%s'" % _magic
- elif o in ('-k','--keep-going'):
- pass
- elif o in ('-n','--flush'):
- flush = 1
- elif o in ('-v','--version'):
- print 'VERSION'
- return 0
- elif o in ('-z','--compressed'):
- pass
- elif o in ('-L','--follow'):
- follow = 1
- elif o in ('-s','--special'):
- special = 1
- else:
- if files :
- files = map(lambda x: x.strip(), v.split(','))
- if '-' in files and '-' in args :
- error( 1, 'cannot use STDIN simultaneously for file list and data' )
- for file in files :
- for name in (
- (file=='-')
- and sys.stdin
- or open(file,'r',BUFFER_SIZE)
- ).xreadlines():
- name = name.strip()
- if name not in args :
- args.append( name )
- try:
- if check : print >>sys.stderr, 'Loading magic database...'
- t0 = time.time()
- m = MagicFile(_magic)
- t1 = time.time()
- if check :
- print >>sys.stderr, \
- m.total_tests, 'tests loaded', \
- 'for', '%.2f' % (t1-t0), 'seconds'
- print >>sys.stderr, len(m.tests), 'tests at top level'
- return 0 # XXX "shortened" form ;-)
- mlen = max( map(len, args) )+1
- for arg in args :
- if not brief : print (arg + ':').ljust(mlen),
- ftype = get_file_type( arg, follow )
- if (special and ftype.find('special')>=0) \
- or ftype[-4:] == 'file' :
- t0 = time.time()
- try:
- t = m.detect( arg )
- except (IOError,os.error), why:
- t = "can't read `%s' (%s)" % (why.filename,why.strerror)
- if ftype[-4:] == 'file' : t = ftype[:-4] + t
- t1 = time.time()
- print t and t or 'data'
- if 0 : print \
- '#\t%d tests ok, %d tests failed for %.2f seconds'%\
- (m.ack_tests, m.nak_tests, t1-t0)
- else:
- print mime and 'application/x-not-regular-file' or ftype
- if flush : sys.stdout.flush()
- # print >>sys.stderr, 'DONE'
- except:
- if check : return 1
- raise
- else:
- return 0
- finally:
- pass
- if __name__ == '__main__' :
- sys.exit( main() )
- # vim:ai
- # EOF #
|