What is DialoGPT? 04/16 Update SLTechnology News&Howtos

What is DialoGPT?

2025-04-16 Update From: SLTechnology News&Howtos shulou NAV: SLTechnology News&Howtos > Development >

Shulou(Shulou.com)06/02 Report--

This article introduces the knowledge of "what is DialoGPT". In the operation of actual cases, many people will encounter such a dilemma. Then let the editor lead you to learn how to deal with these situations. I hope you can read it carefully and be able to achieve something!

Introduction

Large-scale pretraining for dialogue

DialoGPT is a pre-training model of dialogue generation based on GPT-2, which is trained on reddit data sets.

Assume that the environment has been set up

Add inference function to eval_util.py

Def inference_model_results (model, tokenizer, inference_dataloader, args):

# use the same signature with eval_model_generation

Logger.info ('compute eval model loss, using eval mode,'

'please change it back to train after calling this function')

Model.eval ()

Tot_sample = []

With torch.no_grad ():

For step, batch in enumerate (inference_dataloader):

Batch = tuple (t.to (args.device) for t in batch)

Input_ids, position_ids, token_ids, label_ids, src_len, _ = batch

If args.no_token_id:

Token_ids = None

N_sample = input_ids.shape [0]

Logits = model.inference (input_ids, position_ids, token_ids)

Def decode (batch_data, tokenizer, input_flag):

Results = []

Batch_data = batch_data.cpu () .data.numpy ()

For one_logits in batch_data: # [sentence_len, vocabulary_size]

If not input_flag:

Word_ids = np.argmax (one_logits, axis=1)

Else:

Word_ids = one_logits

Words = []

For id in word_ids:

If tokenizer.decoder [id]! = "":

Words.append (tokenizer.decoder [id])

Else:

Break

Output_words = []

For word in words:

Output_words.append (word [1:]) if word.startswith ("words") else output_words.append (word)

Results.append (".join (output_words))

Return results

Posts = decode (input_ids, tokenizer, True)

Inferences = decode (logits, tokenizer, False)

Tot_sample.append (n_sample)

Logger.info ("model inference results")

For index in range (len (posts)):

Print ("post:", posts [index])

Print ("inference:", inferences [index])

# print (inferences)

Break

# todo

Return None

Add inference function to class GPT2LMHeadModel (GPT2PreTrainedModel) in modeling_gpt2.py

Def inference (self, input_ids, position_ids=None, token_type_ids=None, past=None):

Hidden_states, presents = self.transformer (input_ids, position_ids, token_type_ids, past)

Lm_logits = self.lm_head (hidden_states)

Return lm_logits

Customize the inference_LSP.py file

File content

# Copyright (c) Microsoft Corporation.

# Licensed under the MIT license.

* @ Desc: train GPT2 from scratch/ fine tuning.

Modified based on Huggingface GPT-2 implementation

Import json

Import os

Import sys

Import argparse

Import logging

Import time

Import tqdm

Import datetime

Import torch

Import numpy as np

From os.path import join

From torch.distributed import get_rank, get_world_size

From lsp_model import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config, Adam

From gpt2_training.train_utils import load_model, boolean_string, set_lr, get_eval_list_same_length

From gpt2_training.eval_utils import eval_model_loss, inference_model_results

From data_loader import BucketingDataLoader, DynamicBatchingLoader, DistributedBucketingDataLoader

From gpt2_training.distributed import all_reduce_and_rescale_tensors, all_gather_list

Os.environ ['CUDA_VISIBLE_DEVICES'] = "0"

Logging.basicConfig (

Format='% (asctime) s -% (levelname) s -% (name) s -% (message) s'

Datefmt='%m/%d/%Y% HRV% MRV% slots, level=logging.INFO)

Logger = logging.getLogger (_ _ name__)

INF = 100000000

CACHE_EMPTY_STEP = 10000

EVAL_STEP = 10000

# Prepare Parser

Parser = argparse.ArgumentParser ()

Parser.add_argument ('--model_name_or_path', type=str, required=True

Help='pretrained model name or path to local checkpoint')

Parser.add_argument ("- seed", type=int, default=42)

Parser.add_argument ("- max_seq_length", type=int, default=128)

Parser.add_argument ("- init_checkpoint", type=str, required=True)

Parser.add_argument ("- inference_input_file", type=str, required=True)

Parser.add_argument ("- inference_batch_size", type=int, default=8)

Parser.add_argument ("--num_optim_steps", type=int, default=1000000

Help= "new API specifies num update steps")

Parser.add_argument ("- fp16", type=boolean_string, default=True)

Parser.add_argument ("- normalize_data", type=boolean_string, default=True)

Parser.add_argument ("- loss_scale", type=float, default=0)

Parser.add_argument ("- no_token_id", type=boolean_string, default=True)

Parser.add_argument ("- log_dir", type=str, required=True)

# distributed

Parser.add_argument ('--local_rank', type=int, default=-1

Help='for torch.distributed')

Parser.add_argument ('--config', help='JSON config file')

# do normal parsing

Args = parser.parse_args ()

If args.config is not None:

# override argparse defaults by config JSON

Opts = json.load (open (args.config))

For k, v in opts.items ():

If isinstance (v, str):

# PHILLY ENV special cases

If 'PHILLY_JOB_DIRECTORY' in v:

V = v.replace ('PHILLY_JOB_DIRECTORY'

Os.environ ['PHILLY_JOB_DIRECTORY'])

Elif 'PHILLY_LOG_DIRECTORY' in v:

V = v.replace ('PHILLY_LOG_DIRECTORY'

Os.environ ['PHILLY_LOG_DIRECTORY'])

Setattr (args, k, v)

# command line should override config JSON

Argv = sys.argv [1:]

Overrides, _ = parser.parse_known_args (argv)

For k, v in vars (overrides). Items ():

If favored house-{k}'in argv:

Setattr (args, k, v)

Setattr (args, 'local_rank', overrides.local_rank)

If args.local_rank =-1:

Logger.info ('CUDA available? {}' .format (str (torch.cuda.is_available ()

Device = torch.device ("cuda" if torch.cuda.is_available () else "cpu")

N_gpu = torch.cuda.device_count ()

Args.device, args.n_gpu = device, n_gpu

Else: which is a good http://www.120zzzy.com/ in Zhengzhou Gynecology Hospital?

# distributed training

Torch.cuda.set_device (args.local_rank)

Device = torch.device ("cuda", args.local_rank)

# Initializes the distributed backend which will take care of

# sychronizing nodes/GPUs

Torch.distributed.init_process_group (backend='nccl')

N_gpu = torch.distributed.get_world_size ()

Args.device, args.n_gpu = device, 1

Logger.info ("device: {} n_gpu: {}, distributed training: {},"

"16-bits training: {}" .format

Device, n_gpu, bool (args.local_rank! =-1), args.fp16))

Timestamp = datetime.datetime.now (). Strftime ('% Ymuri% MMI% d% H% M% S')

Log_dir = args.log_dir

Logger.info ('Input Argument Information')

Args_dict = vars (args)

For an in args_dict:

Logger.info ('%-28s% s'% (a, args_ [a]))

# Prepare Data Set

Print ("Prepare Data")

Enc = GPT2Tokenizer.from_pretrained (args.model_name_or_path)

Config = GPT2Config.from_json_file (

Join (args.model_name_or_path, 'config.json'))

Inference_dataloader_loss = DynamicBatchingLoader (

Args.inference_input_file, enc, args.normalize_data

Args.inference_batch_size, args.max_seq_length)

Inference_dataloader_gen = get_eval_list_same_length (

Args.inference_input_file, enc, args.inference_batch_size, True)

# eval_dataloader_loss = DynamicBatchingLoader (

# args.eval_input_file, enc, args.normalize_data

# args.eval_batch_size, args.max_seq_length)

# eval_dataloader_gen = get_eval_list_same_length (

# args.eval_input_file, enc, args.eval_batch_size, True)

# Prepare Model

Print ("Prepare Model")

Logger.info ("Prepare Model")

Model = load_model (GPT2LMHeadModel (config), args.init_checkpoint

Args, verbose=True)

If args.local_rank! =-1:

# when from scratch make sure initial models are the same

Params = [p.data for p in model.parameters ()]

All_reduce_and_rescale_tensors (params, float (torch.distributed.get_world_size ()

No_decay = ['bias',' ln'] # no decay for bias and LayerNorm (ln)

# Inference!

Print ("Model inference")

Logger.info ("Model inference")

Inference_logger = open (join (log_dir, 'inference_log.txt'),' aforementioned, buffering=1)

Epoch = 0

If args.local_rank! =-1:

N_gpu = 1

# todo modify loss out.

Results = inference_model_results (model, enc, inference_dataloader_loss, args)

# todo output format

# print ('{}, {} '.format (epoch + 1, global_step + 1, step + 1, eval_loss, eval_ppl), file=inference_logger)

Logger.info ("inference_final_results:")

If results is None:

Logger.info ("current results are None")

Else:

Logger.info (results)

Inference_logger.close ()

Python inference_LSP.py-model_name_or_path. / models/medium/-init_checkpoint. / 12_5_self_output/GPT2.1e-05.8.3gpu.2019-12-04225327/GP2-pretrain-step-50000.pkl-- inference_input_file. / selfdata/attack_chatbot.tsv-- log_dir inference_logs_dir/

Inference

Validset.tsv:

Model_name_or_path. / models/medium/-- init_checkpoint. / 12_5_self_output/GPT2.1e-05.8.3gpu.2019-12-04225327/GP2-pretrain-step-50000.pkl-- inference_input_file. / selfdata/validset.tsv-- log_dir inference_logs_dir/

. / models/medium/medium_ft.pkl

That's all for "what DialoGPT is". Thank you for your reading. If you want to know more about the industry, you can follow the website, the editor will output more high-quality practical articles for you!

Welcome to subscribe "Shulou Technology Information " to get latest news, interesting things and hot topics in the IT industry, and controls the hottest and latest Internet news, technology news and IT industry trends.

*The comments in the above article only represent the author's personal views and do not represent the views and positions of this website. If you have more insights, please feel free to contribute and share.