In addition to Weibo, there is also WeChat
Please pay attention
WeChat public account
Shulou
2025-03-28 Update From: SLTechnology News&Howtos shulou NAV: SLTechnology News&Howtos > Development >
Share
Shulou(Shulou.com)06/02 Report--
This article introduces the knowledge of "what is DialoGPT". In the operation of actual cases, many people will encounter such a dilemma. Then let the editor lead you to learn how to deal with these situations. I hope you can read it carefully and be able to achieve something!
Introduction
Large-scale pretraining for dialogue
DialoGPT is a pre-training model of dialogue generation based on GPT-2, which is trained on reddit data sets.
Assume that the environment has been set up
Add inference function to eval_util.py
Def inference_model_results (model, tokenizer, inference_dataloader, args):
# use the same signature with eval_model_generation
Logger.info ('compute eval model loss, using eval mode,'
'please change it back to train after calling this function')
Model.eval ()
Tot_sample = []
With torch.no_grad ():
For step, batch in enumerate (inference_dataloader):
Batch = tuple (t.to (args.device) for t in batch)
Input_ids, position_ids, token_ids, label_ids, src_len, _ = batch
If args.no_token_id:
Token_ids = None
N_sample = input_ids.shape [0]
Logits = model.inference (input_ids, position_ids, token_ids)
Def decode (batch_data, tokenizer, input_flag):
Results = []
Batch_data = batch_data.cpu () .data.numpy ()
For one_logits in batch_data: # [sentence_len, vocabulary_size]
If not input_flag:
Word_ids = np.argmax (one_logits, axis=1)
Else:
Word_ids = one_logits
Words = []
For id in word_ids:
If tokenizer.decoder [id]! = "":
Words.append (tokenizer.decoder [id])
Else:
Break
Output_words = []
For word in words:
Output_words.append (word [1:]) if word.startswith ("words") else output_words.append (word)
Results.append (".join (output_words))
Return results
Posts = decode (input_ids, tokenizer, True)
Inferences = decode (logits, tokenizer, False)
Tot_sample.append (n_sample)
Logger.info ("model inference results")
For index in range (len (posts)):
Print ("post:", posts [index])
Print ("inference:", inferences [index])
# print (inferences)
Break
# todo
Return None
Add inference function to class GPT2LMHeadModel (GPT2PreTrainedModel) in modeling_gpt2.py
Def inference (self, input_ids, position_ids=None, token_type_ids=None, past=None):
Hidden_states, presents = self.transformer (input_ids, position_ids, token_type_ids, past)
Lm_logits = self.lm_head (hidden_states)
Return lm_logits
Customize the inference_LSP.py file
File content
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
''
* @ Desc: train GPT2 from scratch/ fine tuning.
Modified based on Huggingface GPT-2 implementation
''
Import json
Import os
Import sys
Import argparse
Import logging
Import time
Import tqdm
Import datetime
Import torch
Import numpy as np
From os.path import join
From torch.distributed import get_rank, get_world_size
From lsp_model import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config, Adam
From gpt2_training.train_utils import load_model, boolean_string, set_lr, get_eval_list_same_length
From gpt2_training.eval_utils import eval_model_loss, inference_model_results
From data_loader import BucketingDataLoader, DynamicBatchingLoader, DistributedBucketingDataLoader
From gpt2_training.distributed import all_reduce_and_rescale_tensors, all_gather_list
Os.environ ['CUDA_VISIBLE_DEVICES'] = "0"
Logging.basicConfig (
Format='% (asctime) s -% (levelname) s -% (name) s -% (message) s'
Datefmt='%m/%d/%Y% HRV% MRV% slots, level=logging.INFO)
Logger = logging.getLogger (_ _ name__)
INF = 100000000
CACHE_EMPTY_STEP = 10000
EVAL_STEP = 10000
#
# Prepare Parser
#
Parser = argparse.ArgumentParser ()
Parser.add_argument ('--model_name_or_path', type=str, required=True
Help='pretrained model name or path to local checkpoint')
Parser.add_argument ("- seed", type=int, default=42)
Parser.add_argument ("- max_seq_length", type=int, default=128)
Parser.add_argument ("- init_checkpoint", type=str, required=True)
Parser.add_argument ("- inference_input_file", type=str, required=True)
Parser.add_argument ("- inference_batch_size", type=int, default=8)
Parser.add_argument ("--num_optim_steps", type=int, default=1000000
Help= "new API specifies num update steps")
Parser.add_argument ("- fp16", type=boolean_string, default=True)
Parser.add_argument ("- normalize_data", type=boolean_string, default=True)
Parser.add_argument ("- loss_scale", type=float, default=0)
Parser.add_argument ("- no_token_id", type=boolean_string, default=True)
Parser.add_argument ("- log_dir", type=str, required=True)
# distributed
Parser.add_argument ('--local_rank', type=int, default=-1
Help='for torch.distributed')
Parser.add_argument ('--config', help='JSON config file')
# do normal parsing
Args = parser.parse_args ()
If args.config is not None:
# override argparse defaults by config JSON
Opts = json.load (open (args.config))
For k, v in opts.items ():
If isinstance (v, str):
# PHILLY ENV special cases
If 'PHILLY_JOB_DIRECTORY' in v:
V = v.replace ('PHILLY_JOB_DIRECTORY'
Os.environ ['PHILLY_JOB_DIRECTORY'])
Elif 'PHILLY_LOG_DIRECTORY' in v:
V = v.replace ('PHILLY_LOG_DIRECTORY'
Os.environ ['PHILLY_LOG_DIRECTORY'])
Setattr (args, k, v)
# command line should override config JSON
Argv = sys.argv [1:]
Overrides, _ = parser.parse_known_args (argv)
For k, v in vars (overrides). Items ():
If favored house-{k}'in argv:
Setattr (args, k, v)
Setattr (args, 'local_rank', overrides.local_rank)
If args.local_rank =-1:
Logger.info ('CUDA available? {}' .format (str (torch.cuda.is_available ()
Device = torch.device ("cuda" if torch.cuda.is_available () else "cpu")
N_gpu = torch.cuda.device_count ()
Args.device, args.n_gpu = device, n_gpu
Else: which is a good http://www.120zzzy.com/ in Zhengzhou Gynecology Hospital?
# distributed training
Torch.cuda.set_device (args.local_rank)
Device = torch.device ("cuda", args.local_rank)
# Initializes the distributed backend which will take care of
# sychronizing nodes/GPUs
Torch.distributed.init_process_group (backend='nccl')
N_gpu = torch.distributed.get_world_size ()
Args.device, args.n_gpu = device, 1
Logger.info ("device: {} n_gpu: {}, distributed training: {},"
"16-bits training: {}" .format
Device, n_gpu, bool (args.local_rank! =-1), args.fp16))
Timestamp = datetime.datetime.now (). Strftime ('% Ymuri% MMI% d% H% M% S')
Log_dir = args.log_dir
Logger.info ('Input Argument Information')
Args_dict = vars (args)
For an in args_dict:
Logger.info ('%-28s% s'% (a, args_ [a]))
#
# Prepare Data Set
#
Print ("Prepare Data")
Enc = GPT2Tokenizer.from_pretrained (args.model_name_or_path)
Config = GPT2Config.from_json_file (
Join (args.model_name_or_path, 'config.json'))
Inference_dataloader_loss = DynamicBatchingLoader (
Args.inference_input_file, enc, args.normalize_data
Args.inference_batch_size, args.max_seq_length)
Inference_dataloader_gen = get_eval_list_same_length (
Args.inference_input_file, enc, args.inference_batch_size, True)
# eval_dataloader_loss = DynamicBatchingLoader (
# args.eval_input_file, enc, args.normalize_data
# args.eval_batch_size, args.max_seq_length)
#
# eval_dataloader_gen = get_eval_list_same_length (
# args.eval_input_file, enc, args.eval_batch_size, True)
#
# Prepare Model
#
Print ("Prepare Model")
Logger.info ("Prepare Model")
Model = load_model (GPT2LMHeadModel (config), args.init_checkpoint
Args, verbose=True)
If args.local_rank! =-1:
# when from scratch make sure initial models are the same
Params = [p.data for p in model.parameters ()]
All_reduce_and_rescale_tensors (params, float (torch.distributed.get_world_size ()
No_decay = ['bias',' ln'] # no decay for bias and LayerNorm (ln)
#
# Inference!
#
Print ("Model inference")
Logger.info ("Model inference")
Inference_logger = open (join (log_dir, 'inference_log.txt'),' aforementioned, buffering=1)
Epoch = 0
If args.local_rank! =-1:
N_gpu = 1
# todo modify loss out.
Results = inference_model_results (model, enc, inference_dataloader_loss, args)
# todo output format
# print ('{}, {} '.format (epoch + 1, global_step + 1, step + 1, eval_loss, eval_ppl), file=inference_logger)
Logger.info ("inference_final_results:")
If results is None:
Logger.info ("current results are None")
Else:
Logger.info (results)
Inference_logger.close ()
Python inference_LSP.py-model_name_or_path. / models/medium/-init_checkpoint. / 12_5_self_output/GPT2.1e-05.8.3gpu.2019-12-04225327/GP2-pretrain-step-50000.pkl-- inference_input_file. / selfdata/attack_chatbot.tsv-- log_dir inference_logs_dir/
Inference
Python inference_LSP.py-model_name_or_path. / models/medium/-init_checkpoint. / 12_5_self_output/GPT2.1e-05.8.3gpu.2019-12-04225327/GP2-pretrain-step-50000.pkl-- inference_input_file. / selfdata/attack_chatbot.tsv-- log_dir inference_logs_dir/
Validset.tsv:
Model_name_or_path. / models/medium/-- init_checkpoint. / 12_5_self_output/GPT2.1e-05.8.3gpu.2019-12-04225327/GP2-pretrain-step-50000.pkl-- inference_input_file. / selfdata/validset.tsv-- log_dir inference_logs_dir/
. / models/medium/medium_ft.pkl
That's all for "what DialoGPT is". Thank you for your reading. If you want to know more about the industry, you can follow the website, the editor will output more high-quality practical articles for you!
Welcome to subscribe "Shulou Technology Information " to get latest news, interesting things and hot topics in the IT industry, and controls the hottest and latest Internet news, technology news and IT industry trends.
Views: 0
*The comments in the above article only represent the author's personal views and do not represent the views and positions of this website. If you have more insights, please feel free to contribute and share.
Continue with the installation of the previous hadoop.First, install zookooper1. Decompress zookoope
"Every 5-10 years, there's a rare product, a really special, very unusual product that's the most un
© 2024 shulou.com SLNews company. All rights reserved.