#!/usr/bin/perl # # ICQ database history converter - licq.historyconvert # 2002, Walter Chang # # Run with .dat # licq.historyconvert [options] .dat # Options: # -q Quiet # -v Verbose # -d Dump to stdout in human-readable format # -9 ICQ99b compatibility mode # # Outputs uin.history files to current directory. # This is sorta tested against Windows ICQ 99b, 2000a, and 2000b. # It converts mine just fine. YMMV. # Known Issues: # - needs more testing # - Unix likes time in UTC, while Windows stores local time. I need # to correct the timestamp based on the user's timezone. # - "human-readable" timestamp isn't readable. Seconds since 1/1/1970. # - verbose isn't very verbose. Oh well. # - if you cat the result to the screen, and your messages contain # accent marks (French, etc), your terminal will look funny and # you'll need to reset the term. The data is fine, though. # - it does no sorting on history events. This does not affect licq; # it is a problem only if you assume history events are sorted! # Implemented: # Message # File Request # URL # Auth Req # Auth Grant # Auth Denied # Not implemented: # Chat Request # Reminder # Note # Server list # Objectionable words # My details # User information # Contact list (already done by licq.winconvert) # Theory of Operation # # The Windows ICQ Client Database format # # It's bizarre. From hex editor observations, it looks like a linked list # of semi-fixed-but-variable-length records with additional indexing # structure. This is stored as .dat; part of the dynamic index is # in .idx, and the rest of the data structure is woven through # the dat file. I have been unable to determine how to generate a database # file. # # Thankfully, reading one is easier. Each history event is immediately # preceeded by a 16-byte "fingerprint" which varies by version and can # assume multiple values. This is followed by status, type, and uin. # # For normal messages and URL messages, this is followed by the "content" # (the text or url, respectively), which consists of the string length # (int16) followed by that many characters of text. Lastly, it includes # direction (int32; can be 0 or 1) and timestamp (seconds since 1/1/1970). # The content of a URL message is the url and a description, separated # by a magic character. # # File requests have a file description (int16 followed by filename), a # direction (int32), timestamp (int32), and a "local path" field which # we can safely discard. # # The remainder of messages are similar. The code should be sensible. # # # Licq Client file formats # # This can be easily inferred by reading source. # History files consist of .history, where uin is the other person's # uin. Thus, you'll have a history file for every user you've been in # contact with. These history files are plaintext. # # Each event in the history file contains a header as follows: # [ direction | command | secondary command | flag | timestamp ] # The numeric values are in the licq source headers. # This is followed by the content of the message, which is either # plain text message or line-delimited fields (file transfer, etc), # with a colon before every line. Blank lines separate events. # # License: GPL, I suppose. Anything to help the switch. my $typ; my $stat; my $uin; my $timestamp; my $dir; my $mlen; my $mcontent; my @urlcontent; my @authdat; my $fdesc; my $filename; # known event fingerprints @sigs = ("\xe0\x23\xa3\xdb\xdf\xb8\xd1\x11\x8a\x65\x00\x60\x08\x71\xa3\x91", "\xe2\x23\xa3\xdb\xdf\xb8\xd1\x11\x8a\x65\x00\x60\x08\x71\xa3\x91", "\x50\x3b\xc1\x5c\x5c\x95\xd3\x11\x8d\xd7\x00\x10\x4b\x06\x46\x2e", "\xa0\xc0\x0c\x2f\x5c\x95\xd3\x11\x8d\xd7\x00\x10\x4b\x06\x46\x2e"); # msgtype code -> english %cmds = ( "1" => "Message", "2" => "Chat", "3" => "File", "4" => "URL", "6" => "Authorization Request", "7" => "Authorization Refused", "8" => "Authorization Granted", "9" => "System Message", "12" => "Added to List", "13" => "Web Panel message", "14" => "Email Pager message", "19" => "Contact List", "26" => "User Info" ); foreach $arg (@ARGV) { if ($arg eq "-q") { $verbose = 0; } elsif ($arg eq "-v") { $verbose = 1; } elsif ($arg eq "-d") { $dbg = 1; } elsif ($arg eq "-9") { $icq99b = 1; } else { $fn = $arg; } } if (!defined($fn)) { print STDERR "\nsyntax: licq.historyconvert [options] .dat\n"; print STDERR "Extracts history out of an ICQ 99b/2000 database\n"; print STDERR "Options\n"; print STDERR " -q quiet\n"; print STDERR " -v verbose\n"; print STDERR " -d dump to screen in human-readable form\n"; print STDERR " -9 ICQ 99b compatibility mode\n"; print STDERR "\n"; exit 0; } readFile(); sub readFile() { open(INFH, "<$fn") || die "can't open file '$fn'\n"; binmode INFH; print "Opened $fn\n" if $quiet != 1; $keys = ""; $pos = 0; print "reading...\n" if $verbose == 1; nextRecord(); print "Done\n"; close(INFH); print "\n" if $verbose == 1; } sub nextRecord() { do { nextKey(); return 0 if (eof(INFH)); # read post-header information if ($typ eq 1 || $typ eq 4) { # Message or URL read INFH, $buf, 2; $mlen = unpack("v", $buf); read INFH, $mcontent, $mlen; read INFH, $buf, 4; read INFH, $buf, 4; $dir = unpack("V", $buf); read INFH, $buf, 2; read INFH, $buf, 4; $timestamp = unpack("V", $buf); if($typ eq 4) { @urlcontent = split /\xFE/, $mcontent; pUrl($dir, $uin, $timestamp, $urlcontent[0], $urlcontent[1]); } else { pMessage($dir, $uin, $timestamp, $mcontent); } } elsif ($typ eq 3) { # File Request read INFH, $buf, 2; $mlen = unpack("v", $buf); read INFH, $fdesc, $mlen; read INFH, $buf, 4; read INFH, $buf, 4; $dir = unpack("V", $buf); read INFH, $buf, 2; read INFH, $buf, 4; $timestamp = unpack("V", $buf); read INFH, $buf, 19; read INFH, $buf, 2; $mlen = unpack("v", $buf); read INFH, $filename, $mlen; read INFH, $buf, 4; $fsize = unpack("V", $buf); # The "Local Path" field appears useless, and causes # problems when reasing 99b databases. #read INFH, $buf, 2; #$mlen = unpack("v", $buf); #read INFH, $locpath, $mlen; pFile($dir, $uin, $timestamp, $filename, $fsize, $fdesc); } elsif ($typ eq 6 || $typ eq 7 || $typ eq 8) { # System Message read INFH, $buf, 2; $mlen = unpack("v", $buf); read INFH, $txt, $mlen; my ($alias, $fname, $lname, $email, $stat, $res) = split /\xFE/, $txt; read INFH, $buf, 4; read INFH, $buf, 4; $dir = unpack("V", $buf); pAuthReq($dir, $uin, $timestamp, $alias, $fname, $lname, $email, $res) if ($typ eq 6); pAuthResponse($dir, $uin, $timestamp, $typ, $txt) if ($typ eq 7 || $typ eq 8); } else { } # many else clauses here } while false; } sub pMessage { my ($dir, $uin, $tstamp, $msg) = @_; if ($dbg == 1) { print "\n\nMessage "; print getDirText($dir); print $uin; print "\nTimestamp: $tstamp\n"; print "Text:\n$msg\n"; } else { $hdir = getDir($dir); $header = fmtHeader($hdir, "0001", "2030", "0000", $tstamp); @content = split /\n/, $msg; printLicq($uin, $header, @content); } } sub pUrl { my ($dir, $uin, $tstamp, $desc, $url) = @_; my @tmp = ( $url ); if ($dbg == 1) { print "\n\nURL "; print getDirText($dir); print $uin; print "\nTimestamp: $tstamp\n"; print "URL: $url\n"; print "Description: $desc\n"; } else { $header = fmtHeader(getDir($dir), "0004", "2030", "0000", $tstamp); push(@tmp, split(/\n/, $desc)); printLicq($uin, $header, @tmp); } } sub pFile { my ($dir, $uin, $tstamp, $fname, $fsize, $desc) = @_; my @tmp = ( $fname, $fsize ); if ($dbg == 1 ) { print "\n\nFile Request "; print getDirText($dir); print $uin; print "\nTimestamp: $tstamp\n"; print "File name: $fname\n"; print "File size: $fsize\n"; print "Description: $desc\n"; } else { $header = fmtHeader(getDir($dir), "0003", "2030", "0000", $tstamp); push(@tmp, split(/\n/, $desc)); printLicq($uin, $header, @tmp); } } sub pAuthReq { my ($dir, $uin, $tstamp, $alias, $fname, $lname, $email, $res) = @_; my @tmp = ( $uin, $alias, $fname, $lnamee, $email ); if ($dbg == 1) { print "\n\nAuthorization Request "; print getDirText($dir); print $uin; print "\nTimestamp: $tstamp\n"; print "Alias: $alias\n"; print "First name: $fname \t\t Last Name: $lname\n"; print "email: $email\n"; print "Reason: $res\n"; } else { $header = fmtHeader(getDir($dir), "0006", "2030", "0000", $tstamp); push(@tmp, split(/\n/, $res)); printLicq($uin, $header, @tmp); } } sub pAuthResponse { my ($dir, $uin, $tstamp, $typ, $res) = @_; my @tmp = ( $uin ); if ($dbg == 1) { print "\n\nAuthorization Refused " if ($typ eq 7); print "\n\nAuthorization Granted " if ($typ eq 8); print getDirText($dir); print $uin; print "\nTimestamp: $tstamp\n"; print "Reason: $res\n"; } else { $typ = "000" . $typ; $header = fmtHeader(getDir($dir), $typ, "2030", "0000", $tstamp); push(@tmp, split(/\n/, $res)); printLicq($uin, $header, @tmp); } } sub getDir { my ($dir) = @_; if ($dir eq 0) { return "R"; } else { return "S"; } } sub getDirText { my ($dir) = @_; if ($dir eq 0) { return "recieved from "; } else { return "sent to "; } } sub fmtHeader { my ($dir, $cmd, $scmd, $flg, $tstamp) = @_; $hdr = "[ $dir \| $cmd \| $scmd \| $flg \| $tstamp ]"; return $hdr; } sub printLicq { my ($uin, $hdr, @content) = @_; my $outfile; open $outfile, '>>', $uin . ".history"; print $outfile "\n$hdr\n"; foreach (@content) { print $outfile ":$_\n"; } # perl implicitly closes if needed... # and leaves open if it's used again "soon"... } sub matchp { my ( $s ) = @_; if ($icq99b == 1) { return $s eq $sigs[0]; } else { return $s eq $sigs[0] || $s eq $sigs[1] || $s eq $sigs[2] || $s eq $sigs[3] ; } } #seeks to end of next fingerprint sub nextKey() { $s = ""; $count = 0; while(!matchp($s) && !eof(INFH)) { read INFH, $buf, 1; if (length $s < 16) { $s .= $buf; } else { $s = (substr $s, 1) . $buf; } $count = $count + 1; } # should be at end of fingerprint read INFH, $buf, 2; # read status (long) read INFH, $buf, 4; $stat = unpack("V", $buf); # read type (word) read INFH, $buf, 2; $typ = unpack("v", $buf); # read UIN (long) read INFH, $buf, 4; $uin = unpack("V", $buf); return 1; }