Hi,Im using pyflag to parse .pcap files. Pyflag displays the data part of the .pcap file in binary and I was wondering what this means. How do I decode the 'data' part of the packet without using external libraries? What does these non-alpha numerical information mean? Is there a (python) dictionary/documentation somewhere that I can use to parse this data?
Thanks again. Cheers AstanPS: I've attached the .pcap file that Im using to test and the python file as well.
-- "Formulations of number theory: Complete, Consistent, Non-trivial. Choose two." Animal Logic http://www.animallogic.com Please think of the environment before printing this email. This email and any attachments may be confidential and/or privileged. If you are not the intended recipient of this email, you must not disclose or use the information contained in it. Please notify the sender immediately and delete this document if you have received it in error. We do not guarantee this email is error or virus free.
# ****************************************************** # Michael Cohen <[EMAIL PROTECTED]> # # ****************************************************** # Version: FLAG $Version: 0.86RC1 Date: Thu Jan 31 01:21:19 EST 2008$ # ****************************************************** # # * This program is free software; you can redistribute it and/or # * modify it under the terms of the GNU General Public License # * as published by the Free Software Foundation; either version 2 # * of the License, or (at your option) any later version. # * # * This program is distributed in the hope that it will be useful, # * but WITHOUT ANY WARRANTY; without even the implied warranty of # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # * GNU General Public License for more details. # * # * You should have received a copy of the GNU General Public License # * along with this program; if not, write to the Free Software # * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # ****************************************************** """ A library for reading PCAP files. PCAP format is really simple so this is a nobrainer. Good example of using the file format library though. This shows how we can handle the endianess issue simply. """ import struct,time,cStringIO import sys ## This is the default size that will be read when not specified DEFAULT_SIZE=600*1024*1024 class Buffer: """ This class looks very much like a string, but in fact uses a file object. The advantage here is that when we do string slicing, we are not duplicating strings all over the place (better performace). Also it is always possible to tell where a particular piece of data came from. """ def __init__(self,fd,offset=0,size=None): """ We can either specify a string, or a fd as the first arg """ self.offset=offset self.fd=fd if size!=None: self.size=size else: self.fd=fd if size!=None: self.size=size else: ## Try to calculate the size by seeking the fd to the end offset = fd.tell() try: fd.seek(0,2) except Exception,e: print "%s: %r" % (e,fd) self.size=fd.tell() fd.seek(offset) # if self.size<0: # raise IOError("Unable to set negative size (%s) for buffer (offset was %s)" % (self.size,self.offset)) def clone(self): return self.__class__(fd=self.fd, offset=self.offset, size=self.size) def __len__(self): return self.size def set_offset(self,offset): """ This sets the absolute offset. It is useful in files which specify an absolute offset into the file within some of the data structures. """ return self.__class__(fd=self.fd,offset=offset) def __getitem__(self,offset): """ Return a single char from the string """ self.fd.seek(offset+self.offset) return self.fd.read(1) ## FIXME: Python slicing will only pass uint_32 using the syntax def __getslice__(self,a=0,b=None): """ Returns another Buffer object which may be manipulated completely independently from this one. Note that the new buffer object references the same fd that we are based on. """ if b: if b>self.size: b=self.size return self.__class__(fd=self.fd,offset=self.offset+a,size=b-a) else: return self.__class__(fd=self.fd,offset=self.offset+a) def __str__(self): self.fd.seek(self.offset) if self.size>=0: data=self.fd.read(self.size) else: data=self.fd.read(DEFAULT_SIZE) # if len(data) < self.size: # raise IOError("Unable to read %s bytes from %s" %(self.size,self.offset)) return data def __nonzero__(self): return 1 def search(self, bytes): """ Searches the buffer for the occurance of bytes. """ data = self.__str__() return data.find(bytes) #### Start of data definitions: class DataType: """ Base class that reads a data type from the file.""" ## Controls if this is visible in the GUI visible = False ## This is the SQL type which is most appropriate for storing the ## results of value() sql_type = "text" data='' def __init__(self,buffer,*args,**kwargs): """ This will force this class to read the data type from data at the specified offset """ if isinstance(buffer,str): fd = cStringIO.StringIO(buffer) self.buffer=Buffer(fd) else: self.buffer=buffer self.parameters = kwargs if self.buffer: self.data=self.read() else: self.buffer=Buffer(cStringIO.StringIO('')) def size(self): """ This is the size of this data type - it returns the number of bytes we consume. """ return 0 def __str__(self): return "%s" % (self.data,) def read(self): """ Abstract method that returns the data type required to be stored internally """ return None def write(self,out): pass def __ne__(self,target): return not self.__eq__(target) def get_value(self): """ In the general case we return ourself as the opaque data type """ return self def set_value(self, data): self.data=data def form(self,prefix, query,result): pass def display(self, result): result.text(self.__str__(), wrap='full', sanitise='full', font='typewriter') def value(self): return self.__str__() class RAW(DataType): """ This data type is simply a data buffer. """ def __init__(self,buffer,*args,**kwargs): DataType.__init__(self,buffer,*args,**kwargs) self.buffer = buffer self.data = buffer.__str__() def size(self): return len(self.data) def get_value(self): return self.buffer.clone() def __repr__(self): if not self.data: self.read() result = ''.join([self.data[a].__str__() for a in range(len(self.data))]) return result def __str__(self): tmp = [] for i in range(len(self.data)): char = "%s" % self.data[i] if char.isalnum() or char in '[EMAIL PROTECTED]&*()_+-=[]\\{}|;\':",./<>?': tmp.append(char) else: tmp.append('.') return ''.join(tmp) class BasicType(DataType): """ Base class for basic types that unpack understands """ sql_type = "int" def __init__(self,buffer,*args,**kwargs): try: if kwargs['endianess'].startswith('l'): direction = "<" elif kwargs['endianess'].startswith('b'): direction = ">" ## Enforce the endianess if self.fmt[0] in '<>=@': self.fmt = direction+self.fmt[1:] else: self.fmt = direction+self.fmt except KeyError: pass try: self.data = kwargs['value'] except KeyError: pass DataType.__init__(self,buffer,*args,**kwargs) def size(self): return struct.calcsize(self.fmt) def read(self): try: length = struct.calcsize(self.fmt) if length>0: return struct.unpack(self.fmt,self.buffer[:length].__str__())[0] return '' except struct.error,e: raise IOError("%s. Tried to use format string %s"% (e, self.fmt)) def write(self, output): try: # print "%r" % self,self.data, self.fmt data = struct.pack(self.fmt, self.data) output.write(data) except struct.error,e: raise IOError("%s" % e) def __int__(self): return int(self.data) def get_value(self): return self.data def set_value(self,v): self.data=v def __eq__(self,target): if isinstance(target,int): return self.data==target try: return self.data==target except: return False def form(self,prefix, query,result): result.row("Size", self.size()) class WORD(BasicType): """ Reads a word (short int) from the data in big endian """ fmt = '=H' visible = True sql_type = "int" def __str__(self): return "0x%X" % (self.data,) class USHORT(WORD): pass class LONG(BasicType): fmt='=l' visible = True class ULONG(BasicType): fmt='=L' visible = True def __str__(self): return "%s (0x%X)" % (self.data,self.data) class ULONG_CONSTANT(ULONG): """ This class enforces a condition raising an error otherwise """ def read(self): result = ULONG.read(self) if not result==self.parameters['expected']: raise RuntimeError("Expected value 0x%X, got 0x%X" %( self.parameters['expected'], result)) return result class USHORT_CONSTANT(USHORT): """ This class enforces a condition raising an error otherwise """ def read(self): result = USHORT.read(self) if not result==self.parameters['expected']: raise RuntimeError("Expected value 0x%X, got 0x%X" %( self.parameters['expected'], result)) return result class LEWORD(WORD): fmt = "<H" class LELONG(LONG): fmt = "<l" class LEULONG(ULONG): fmt = "<L" class BEWORD(WORD): fmt = ">H" class BELONG(LONG): fmt = ">l" class BEULONG(ULONG): fmt = ">L" class LONGLONG(BasicType): fmt='=q' visible = True class DWORD(LONG): pass class CHAR(BasicType): fmt='=c' visible = True class BYTE(BasicType): fmt='b' visible = True def __str__(self): ## This is done to remove deprecation warnings: try: return "%0x" % (self.data,) except: return self.data class BYTE_CONSTANT(BYTE): """ This class enforces a condition raising an error otherwise """ def read(self): result = BYTE.read(self) if not result==self.parameters['expected']: raise RuntimeError("Expected value 0x%X, got 0x%X" %( self.parameters['expected'], result)) return result class UBYTE(BasicType): fmt='=B' visible = True def __str__(self): ## This is done to remove deprecation warnings: return "%02x" % (self.data,) class SimpleStruct(DataType): """ A class representing a simple struct to read off disk """ ## These are the fields that are required to define the ## struct. They may be terminated at any time for exmaple the ## array may stop after Name: ## ["Name","Type","Parameters","Description","Function" ] fields = [] def __init__(self,buffer,*args,**kwargs): self.buffer = buffer self.parameters = kwargs ## Keep a reference to our original fields list self._fields = self.fields ## This may change the fields attribute self.init() self.data={} self.offsets={} DataType.__init__(self,buffer,*args,**kwargs) def init(self): pass def add_element(self,result, name,element, *args): """ Adds an element to the dict as well as to the fields table. This function allows users to dynamically add new elements to the struct from the read() method. """ ## we are about to modify our fields attribute. We should make ## a copy to ensure that we do not modify the class ## attribute. This is done so that fields can be filled in the ## class definition to make it more efficient. if self._fields==self.fields: self.fields=self.fields[:] result[name]=element self.fields.append([name, element.__class__]) self.offsets[name]=element.buffer.offset-self.buffer.offset def read(self): self.offset=0 result={} for item in self.fields: try: name = item[0] element = item[1] except: continue parameters = self.parameters.copy() try: parameters.update(item[2]) except Exception,e: parameters = {} ## Evaluate the parameters if needed: for k,v in parameters.items(): if callable(v): parameters[k]=v(result) ## Handle offset specially: if parameters.has_key('offset'): self.offset = parameters['offset'] ## Consume the offset to prevent it from propegating ## to the element (in case its a SimpleStruct too). del parameters['offset'] try: result[name]=element(self.buffer[self.offset:],**parameters) except Exception,e: #raise raise e.__class__("When parsing field %r of %s, %s" % (name, self.__class__,e)) self.offsets[name]=self.offset self.offset+=result[name].size() if self.offset >= self.buffer.size: break return result def write(self,output): for item in self.fields: self.data[item[0]].write(output) def calculate_struct_size(self,struct): """ calculates the total size of struct by summing its individual sizes. struct is a dict of classes. """ size=0 for i in struct.values(): size+=i.size() return size def size(self): return self.calculate_struct_size(self.data) def __str__(self): """ Prints the array nicely """ result='Struct %s:\n' % ("%s" %self.__class__).split('.')[-1] result+='-'*(len(result)-1)+"\n" for i in range(len(self.fields)): item=self.fields[i] try: desc = "%s(%s)" % (item[0],item[3]) except: desc = item[0] try: element=self.data[item[0]] except KeyError: continue tmp = "\n ".join((element.__str__()).splitlines()) result+="%04X - %s: %s\n" % ( element.buffer.offset, desc,tmp) return result def __getitem__(self,attr): return self.data[attr] def __setitem__(self,k,attr): ## print "Setting %s to %s " % (k,attr) self.data[k]=attr def form(self,prefix, query,result): result.row("Size", self.calculate_struct_size(self.data)) class POINTER(LONG): """ This represents a pointer to a struct within the file """ visible = True def __init__(self,buffer,*args,**kwargs): LONG.__init__(self,buffer,*args,**kwargs) try: self.relative_offset = kwargs['relative_offset'] except: self.relative_offset = 0 def calc_offset(self): """ return a buffer object seeked to the correct offset """ offset=self.relative_offset+self.data return self.buffer.set_offset(offset) def get_value(self): data = self.calc_offset() if data==None: return None return self.target_class(data) def __str__(self): result="->%s (0x%08X)" % (self.data,self.data) return result class StructArray(SimpleStruct): def __init__(self,buffer,*args,**kwargs): try: self.count=int(kwargs['count']) # print self.count except: self.count=0 self.fields = [ [i,self.target_class, kwargs] for i in range(self.count)] # if buffer: # print "offset %X %s" % (buffer.offset, buffer.size) SimpleStruct.__init__(self,buffer,*args,**kwargs) def __str__(self): result = "Array %s:" % ("%s" %self.__class__).split('.')[-1] for i in range(self.count): result+="\nMember %s of %s:\n" % (i,self.count) result+="\n ".join(self.data[i].__str__().splitlines()) return result def extend(self,target): self.data[self.count]=target self.count+=1 def __eq__(self,target): for x in range(self.count): try: if not self.data[x]==target[x]: return False except: return False return True def __iter__(self): self.index=0 return self def next(self): try: result=self.data[self.index] except (KeyError, IndexError): raise StopIteration() self.index+=1 return result def get_value(self): return [ self.data[x].get_value() for x in range(self.count) ] class ARRAY(StructArray): def __str__(self): result = ','.join([self.data[a].__str__() for a in range(self.count) if self.data.has_key(a)]) return result class BYTE_ARRAY(ARRAY): target_class=BYTE class UBYTE_ARRAY(ARRAY): target_class=UBYTE class WORD_ARRAY(ARRAY): target_class=WORD class LONG_ARRAY(ARRAY): target_class = LONG class ULONG_ARRAY(ARRAY): target_class = ULONG class STRING(BYTE): visible = True def __init__(self,buffer,*args,**kwargs): try: self.data = kwargs['value'] self.length = len(self.data) self.fmt = "%us" % self.length except KeyError: try: self.length = kwargs['length'].__int__() self.fmt = "%us" % self.length except: raise SystemError("you must specify the length of a STRING") BYTE.__init__(self,buffer,*args,**kwargs) def __str__(self): return "%s" % self.data def substr(self,start,end): """ Truncates the string at a certain point """ self.data=self.data[start:end] def set_value(self, value): self.data = value ## Update our format string to use the length: self.length = len(value) self.fmt = "%ss" % self.length def __len__(self): return self.length def size(self): return self.length def form(self,prefix, query,result): ## print "\nString Form\n" result.textfield("String length","%slength" % prefix) def display(self, result): result.text(self.__str__(), sanitise='full', font='typewriter') class TERMINATED_STRING(DataType): """ This data structure represents a string which is terminated by a terminator. For efficiency we read large blocks and use string finds to locate the terminator """ visible = True terminator='\x00' max_blocksize=1024*1024 initial_blocksize=1024 ## Do we include the terminator? inclusive = True def read(self): blocksize=self.initial_blocksize tmp='' end=-1 while end<0: tmp=self.buffer[0:blocksize].__str__() end=tmp.find(self.terminator) if end>=0: break blocksize*=2 if blocksize>self.max_blocksize: end=self.max_blocksize break ## The size of this string includes the terminator self.raw_size=end+len(self.terminator) return self.buffer[0:self.raw_size].__str__() def size(self): return self.raw_size def get_value(self): if self.inclusive: return self.data else: return self.data[:-len(self.terminator)] def __eq__(self,target): return self.data==target def __getitem__(self,x): return self.data[x] class BYTE_ENUM(UBYTE): types={} def __str__(self): try: return "%s" % (self.types[self.data]) except KeyError: return "Unknown (0x%02X)" % self.data def __eq__(self,target): try: return target==self.types[self.data] except KeyError: return target==self.data def get_value(self): try: return self.types[self.data] except (KeyError,IndexError): return "Unknown (%s)" % self.data class LONG_ENUM(BYTE_ENUM): fmt='=l' class WORD_ENUM(BYTE_ENUM): fmt='=H' class BitField(BYTE): ## This stores the masks masks = {} def __str__(self): result=[ v for k,v in self.masks.items() if k & self.data ] return ','.join(result) class UCS16_STR(STRING): visible = True encoding = "utf_16_le" def read(self): result=STRING.read(self) ## This is the common encoding for windows system: try: return result.decode(self.encoding) except UnicodeDecodeError: if result=='\0': return '' else: return "%r" % result def __str__(self): ## Return up to the first null termination try: result = self.data.__str__() try: return result[:result.index("\0")] except ValueError: return result except UnicodeEncodeError: return "%r" % self.data class CLSID(ULONG_ARRAY): """ A class id - common in windows """ visible = True def __init__(self,buffer,*args,**kwargs): ## Class IDs are 4 uint_32 long kwargs['count']=4 ULONG_ARRAY.__init__(self,buffer,*args,**kwargs) def __str__(self): result=[] for i in self: result.append("%0.8X" % i.get_value()) return "{%s}" % '-'.join(result) class TIMESTAMP(ULONG): """ A standard unix timestamp. Number of seconds since the epoch (1970-1-1) """ visible = True def __str__(self): return time.strftime("%Y/%m/%d %H:%M:%S",time.localtime(self.data)) class WIN_FILETIME(SimpleStruct): """ A FileTime 8 byte time commonly see in windows. This represent the number of 100ns periods since 1601 - how did they come up with that??? """ visible = True sql_type = "int" def init(self): self.fields = [ [ 'low', ULONG ], [ 'high', ULONG ] ] def to_unixtime(self): """ Returns the current time as a unix time """ t=float(self['high'].get_value())* 2**32 +self['low'].get_value() return (t*1e-7 - 11644473600) def get_value(self): """ We just return the unix time here """ return self.to_unixtime() def __str__(self): t = self.to_unixtime() try: return time.strftime("%Y/%m/%d %H:%M:%S",time.localtime(t)) except: return "Invalid Timestamp %X:%X" % (int(self['low']),int(self['high'])) # return "%s" % (time.ctime(t)) class WIN12_FILETIME(WIN_FILETIME): """ A 12 byte variant of above. Last LONG is just all zeros usually so we ignore it """ visible = True def init(self): WIN_FILETIME.init(self) self.fields.append(['pad',ULONG]) class LPSTR(SimpleStruct): """ This is a string with a size before it """ def __init__(self, buffer,*args,**kwargs): SimpleStruct.__init__(self, buffer, *args,**kwargs) try: ## This initialises the LPSTR from kwargs length = len(kwargs['value']) new_string = STRING(kwargs['value'], length=length) self.data = dict(data = new_string, length = ULONG(None, value=length)) except KeyError: pass def init(self): self.fields = [ [ 'length', LONG], [ 'data', STRING, dict(length=lambda x: x['length']) ] ] def set_value(self, value): """ Update our length field automatically """ data = self['data'] data.set_value(value) self['length'].set_value(len(data)) def __str__(self): return self['data'].__str__() class IPAddress(STRING): def __init__(self, buffer,*args,**kwargs): kwargs['length'] = 4 STRING.__init__(self, buffer, *args, **kwargs) def __str__(self): return '.'.join([ord(x).__str__() for x in self.data]) class FileHeader(SimpleStruct): """ The PCAP file header """ fields = [ ['magic', ULONG], ['version_major', WORD], ['version_minor', WORD], ['thiszone', ULONG, None, "gmt to local correction"], ['sigfigs', ULONG, None, "accuracy of timestamps"], ['snaplen', ULONG, None, "max length saved portion of each pkt"], ['linktype', ULONG, None, "data link type (LINKTYPE_*)"], ] def read(self): ## Try to read the file with little endianess self.parameters['endianess']='l' ## Try to find the little endianness magic within the first ## 1000 bytes - There could be some crap at the start of the ## file. tmp = self.buffer[0:1000] off =tmp.search(struct.pack("<L",0xa1b2c3d4)) if off>=0: self.offset = off self.buffer = self.buffer[off:] result=SimpleStruct.read(self) self.start_of_file = off self.start_of_data = self.offset return result off=tmp.search(struct.pack(">L",0xa1b2c3d4)) if off>=0: self.parameters['endianess']='b' self.offset = off self.buffer = self.buffer[off:] result=SimpleStruct.read(self) self.start_of_file = off self.start_of_data = self.offset return result result=SimpleStruct.read(self) ## Dont know the magic raise IOError('This is not a pcap magic (%s) at offset 0x%08X' % (result['magic'], self.buffer.offset)) def __iter__(self): self.offset = self.start_of_data return self def next(self): ## Try to read the next packet and return it: try: b = self.buffer.__getslice__(self.offset) p = Packet(b, endianess=self.parameters['endianess']) self.offset+=p.size() return p except IOError: raise StopIteration class Packet(SimpleStruct): """ Each packet is preceeded by this. """ fields = [ ['ts_sec', TIMESTAMP, None, "time stamp"], ['ts_usec', ULONG, None, "Time in usecs"], ['caplen', ULONG, None, "length of portion present"], ['length', ULONG, None, "length this packet (off wire)"] ] def read(self): result=SimpleStruct.read(self) caplen = int(result['caplen']) if caplen>64000: raise IOError("packet too large at %s, maybe PCAP file is corrupted" % caplen) s=RAW(self.buffer[self.offset:self.offset+caplen]) if s.size()!=caplen: raise IOError("Unable to read the last packet from the file (wanted %s, got %s). Is the file truncated?" % (result['caplen'], s.size())) self.offset+=caplen self.add_element(result, 'data', s) return result def payload(self): return self.data['data'].get_value().__str__() if __name__ == "__main__": fd=open(sys.argv[1],'rb') b=Buffer(fd=fd) pcap = FileHeader(b) print pcap import re for packet in pcap: ## print packet['ts_sec'], packet['length'], packet.buffer.offset print packet
pcap_py.pcap
Description: Binary data
-- http://mail.python.org/mailman/listinfo/python-list