quantize-constituency-model.ipynb

/session/quantization/quantize-constituency-model.ipynb

https://github.com/huseinzol05/Malaya · Jupyter · 471 lines · 471 code · 0 blank · 0 comment · 0 complexity · 620ff2d6942eb1c7423a043c77a10a75 MD5 · raw file

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = ''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "S3_PATH_CONSTITUENCY = {\n",
    "    'bert': {\n",
    "        'model': 'v38/constituency/bert-base.pb',\n",
    "        'dictionary': 'v38/constituency/vocab-bert-base.json',\n",
    "        'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n",
    "        'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n",
    "    },\n",
    "    'tiny-bert': {\n",
    "        'model': 'v38/constituency/tiny-bert.pb',\n",
    "        'dictionary': 'v38/constituency/vocab-tiny-bert.json',\n",
    "        'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n",
    "        'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n",
    "    },\n",
    "#     'albert': {\n",
    "#         'model': 'v38/constituency/albert-base.pb',\n",
    "#         'dictionary': 'v38/constituency/vocab-albert-base.json',\n",
    "#         'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n",
    "#         'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n",
    "#     },\n",
    "#     'tiny-albert': {\n",
    "#         'model': 'v38/constituency/albert-tiny.pb',\n",
    "#         'dictionary': 'v38/constituency/vocab-albert-tiny.json',\n",
    "#         'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n",
    "#         'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n",
    "#     },\n",
    "#     'xlnet': {\n",
    "#         'model': 'v40/constituency/xlnet-base.pb',\n",
    "#         'quantized': 'v40/constituency/xlnet-base.pb.quantized',\n",
    "#         'dictionary': 'v40/constituency/vocab-xlnet-base.json',\n",
    "#         'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n",
    "#         'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n",
    "#     },\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "bert\n",
      "tiny-bert\n"
     ]
    }
   ],
   "source": [
    "for k in S3_PATH_CONSTITUENCY.keys():\n",
    "    if k not in ['multinomial']:\n",
    "        print(k)\n",
    "        os.system(f\"wget https://f000.backblazeb2.com/file/malaya-model/{S3_PATH_CONSTITUENCY[k]['model']}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "from tensorflow.tools.graph_transforms import TransformGraph\n",
    "from glob import glob\n",
    "tf.set_random_seed(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['tiny-bert.pb',\n",
       " 'albert-tiny-similarity.pb',\n",
       " 'bert-base.pb',\n",
       " 'xlnet-base-similarity.pb',\n",
       " 'albert-base-similarity.pb',\n",
       " 'bert-base-similarity.pb',\n",
       " 'alxlnet-base-similarity.pb',\n",
       " 'tiny-bert-similarity.pb']"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pbs = glob('*.pb')\n",
    "pbs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(<tf.Tensor 'import/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_8/intermediate/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_8/output/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_8/output/einsum/Reshape_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_8/output/add:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_8/output/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/self/query/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/self/query/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/self/key/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/self/key/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/self/value/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/self/value/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/output/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/output/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/output/einsum/Reshape_2:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/output/add:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/output/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/intermediate/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/output/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/output/einsum/Reshape_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/output/add:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/output/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/self/query/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/self/query/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/self/key/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/self/key/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/self/value/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/self/value/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/output/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/output/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/output/einsum/Reshape_2:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/output/add:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/output/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/intermediate/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/output/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/output/einsum/Reshape_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/output/add:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/output/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/self/query/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/self/query/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/self/key/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/self/key/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/self/value/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/self/value/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/output/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/output/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/output/einsum/Reshape_2:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/output/add:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/output/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/intermediate/einsum/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/output/einsum/MatMul:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/output/einsum/Reshape_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/output/add:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/output/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_1:0' shape=(?, ?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/Reshape:0' shape=(?, 312) dtype=float32>,),\n",
       " (<tf.Tensor 'import/GatherV2:0' shape=(?, 312) dtype=float32>,)]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with tf.gfile.GFile('tiny-bert.pb', \"rb\") as f:\n",
    "    graph_def = tf.GraphDef()\n",
    "    graph_def.ParseFromString(f.read())\n",
    "\n",
    "with tf.Graph().as_default() as graph:\n",
    "    tf.import_graph_def(graph_def)\n",
    "\n",
    "op = graph.get_operations()\n",
    "x = []\n",
    "for i in op:\n",
    "    try:\n",
    "        if i.values()[0].shape[-1] == 312:\n",
    "        #if 'batchnorm/add_1' in i.values()[0].name:\n",
    "            x.append(i.values())\n",
    "    except:\n",
    "        pass\n",
    "    \n",
    "x[-100:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "mapping = {'albert-base.pb': 'import/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_1:0',\n",
    "          'albert-tiny.pb': 'import/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_1:0',\n",
    "          'bert-base.pb': 'import/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_1:0',\n",
    "          'tiny-bert.pb': 'import/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_1:0',\n",
    "          'xlnet-base.pb': 'import/model/transformer/layer_11/ff/LayerNorm/batchnorm/add_1:0'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_1'"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mapping[pbs[0]].replace('import/','').replace(':0','')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "transforms = ['add_default_attributes',\n",
    "             'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n",
    "             'fold_batch_norms',\n",
    "             'fold_old_batch_norms',\n",
    "             'quantize_weights(fallback_min=-10, fallback_max=10)',\n",
    "             'strip_unused_nodes',\n",
    "             'sort_by_execution_order']\n",
    "\n",
    "for pb in pbs:\n",
    "    input_graph_def = tf.GraphDef()\n",
    "    with tf.gfile.FastGFile(pb, 'rb') as f:\n",
    "        input_graph_def.ParseFromString(f.read())\n",
    "        \n",
    "    a = [mapping[pb].replace('import/','').replace(':0','')]\n",
    "    \n",
    "    transformed_graph_def = TransformGraph(input_graph_def, \n",
    "                                           ['input_ids', 'word_end_mask'],\n",
    "                                           ['charts', 'tags'] + a, transforms)\n",
    "    \n",
    "    with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n",
    "        f.write(transformed_graph_def.SerializeToString())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_graph(frozen_graph_filename, **kwargs):\n",
    "    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n",
    "        graph_def = tf.GraphDef()\n",
    "        graph_def.ParseFromString(f.read())\n",
    "\n",
    "    # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n",
    "    # to fix import T5\n",
    "    for node in graph_def.node:\n",
    "        if node.op == 'RefSwitch':\n",
    "            node.op = 'Switch'\n",
    "            for index in xrange(len(node.input)):\n",
    "                if 'moving_' in node.input[index]:\n",
    "                    node.input[index] = node.input[index] + '/read'\n",
    "        elif node.op == 'AssignSub':\n",
    "            node.op = 'Sub'\n",
    "            if 'use_locking' in node.attr:\n",
    "                del node.attr['use_locking']\n",
    "        elif node.op == 'AssignAdd':\n",
    "            node.op = 'Add'\n",
    "            if 'use_locking' in node.attr:\n",
    "                del node.attr['use_locking']\n",
    "        elif node.op == 'Assign':\n",
    "            node.op = 'Identity'\n",
    "            if 'use_locking' in node.attr:\n",
    "                del node.attr['use_locking']\n",
    "            if 'validate_shape' in node.attr:\n",
    "                del node.attr['validate_shape']\n",
    "            if len(node.input) == 2:\n",
    "                node.input[0] = node.input[1]\n",
    "                del node.input[1]\n",
    "\n",
    "    with tf.Graph().as_default() as graph:\n",
    "        tf.import_graph_def(graph_def)\n",
    "    return graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['albert-base.pb.quantized',\n",
       " 'xlnet-base.pb.quantized',\n",
       " 'albert-tiny.pb.quantized',\n",
       " 'tiny-bert.pb.quantized',\n",
       " 'bert-base.pb.quantized']"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "quantized = glob('*.pb.quantized')\n",
    "quantized"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n",
    "#     graph_def_file='test.pb',\n",
    "#     input_arrays=['Placeholder', 'Placeholder_1'],\n",
    "#     input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n",
    "#     output_arrays=['logits'],\n",
    "# )\n",
    "# # converter.allow_custom_ops=True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n",
    "# converter.target_spec.supported_types = [tf.float16]\n",
    "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
    "# converter.experimental_new_converter = True\n",
    "# tflite_model = converter.convert()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n",
    "#                                        tf.lite.OpsSet.SELECT_TF_OPS]\n",
    "# converter.target_spec.supported_types = [tf.float16]\n",
    "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
    "# tflite_model = converter.convert()\n",
    "\n",
    "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n",
    "#     f.write(tflite_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n",
    "#                                        tf.lite.OpsSet.SELECT_TF_OPS]\n",
    "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n",
    "# tflite_model = converter.convert()\n",
    "\n",
    "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n",
    "#     f.write(tflite_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n",
    "# interpreter.allocate_tensors()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}