#!/bin/python

import random, sys, math

if len(sys.argv) < 6:
  print 'usage: ./generateData.py filename num_records num_tags min_records max_records [max_inter]'
  sys.exit(1)
else:
  filename = sys.argv[1]
  nRecords = int(sys.argv[2])
  nTags    = int(sys.argv[3])
  minRec   = int(sys.argv[4])
  maxRec   = int(sys.argv[5])
  
  maxInter = int(sys.argv[6]) if len(sys.argv) > 5 else 100
  
nInter = random.randint(1, min(minRec, maxInter))
inter = random.sample(xrange(nRecords), nInter)

pool = set(xrange(nRecords))
pool = list(pool - set(inter))

f = open(filename, 'w')
f.write('%d %d\n' % (nTags, nRecords))

for i in xrange(nTags):
  n = random.randint(minRec, maxRec)
  f.write('%d %d ' % (i+1, n))
  
  values = random.sample(pool, n-nInter)
  values.extend(inter)

  f.write(' '.join(map(lambda i : str(i+1), sorted(values))))
  f.write('\n')

f.close()
  
