diff mbox series

[v8,09/12] target/hexagon: import lexer for idef-parser

Message ID 20220209170312.30662-10-anjo@rev.ng (mailing list archive)
State New, archived
Headers show
Series target/hexagon: introduce idef-parser | expand

Commit Message

Anton Johansson Feb. 9, 2022, 5:03 p.m. UTC
From: Paolo Montesel <babush@rev.ng>

Signed-off-by: Alessandro Di Federico <ale@rev.ng>
Signed-off-by: Paolo Montesel <babush@rev.ng>
Signed-off-by: Anton Johansson <anjo@rev.ng>
---
 target/hexagon/idef-parser/idef-parser.h   | 254 +++++++++
 target/hexagon/idef-parser/idef-parser.lex | 566 +++++++++++++++++++++
 target/hexagon/meson.build                 |   4 +
 3 files changed, 824 insertions(+)
 create mode 100644 target/hexagon/idef-parser/idef-parser.h
 create mode 100644 target/hexagon/idef-parser/idef-parser.lex

Comments

Taylor Simpson March 21, 2022, 6:40 p.m. UTC | #1
> -----Original Message-----
> From: Anton Johansson <anjo@rev.ng>
> Sent: Wednesday, February 9, 2022 11:03 AM
> To: qemu-devel@nongnu.org
> Cc: ale@rev.ng; Taylor Simpson <tsimpson@quicinc.com>; Brian Cain
> <bcain@quicinc.com>; Michael Lambert <mlambert@quicinc.com>;
> babush@rev.ng; nizzo@rev.ng; richard.henderson@linaro.org
> Subject: [PATCH v8 09/12] target/hexagon: import lexer for idef-parser
> 
> From: Paolo Montesel <babush@rev.ng>
> 
> Signed-off-by: Alessandro Di Federico <ale@rev.ng>
> Signed-off-by: Paolo Montesel <babush@rev.ng>
> Signed-off-by: Anton Johansson <anjo@rev.ng>
> ---
>  target/hexagon/idef-parser/idef-parser.h   | 254 +++++++++
>  target/hexagon/idef-parser/idef-parser.lex | 566
> +++++++++++++++++++++
>  target/hexagon/meson.build                 |   4 +
>  3 files changed, 824 insertions(+)
>  create mode 100644 target/hexagon/idef-parser/idef-parser.h
>  create mode 100644 target/hexagon/idef-parser/idef-parser.lex

Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
diff mbox series

Patch

diff --git a/target/hexagon/idef-parser/idef-parser.h b/target/hexagon/idef-parser/idef-parser.h
new file mode 100644
index 0000000000..106eb3ec98
--- /dev/null
+++ b/target/hexagon/idef-parser/idef-parser.h
@@ -0,0 +1,254 @@ 
+/*
+ *  Copyright(c) 2019-2021 rev.ng Labs Srl. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef IDEF_PARSER_H
+#define IDEF_PARSER_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <glib.h>
+
+#define TCGV_NAME_SIZE 7
+#define MAX_WRITTEN_REGS 32
+#define OFFSET_STR_LEN 32
+#define ALLOC_LIST_LEN 32
+#define ALLOC_NAME_SIZE 32
+#define INIT_LIST_LEN 32
+#define OUT_BUF_LEN (1024 * 1024)
+#define SIGNATURE_BUF_LEN (128 * 1024)
+#define HEADER_BUF_LEN (128 * 1024)
+
+/* Variadic macros to wrap the buffer printing functions */
+#define EMIT(c, ...)                                                           \
+    do {                                                                       \
+        g_string_append_printf((c)->out_str, __VA_ARGS__);                     \
+    } while (0)
+
+#define EMIT_SIG(c, ...)                                                       \
+    do {                                                                       \
+        g_string_append_printf((c)->signature_str, __VA_ARGS__);               \
+    } while (0)
+
+#define EMIT_HEAD(c, ...)                                                      \
+    do {                                                                       \
+        g_string_append_printf((c)->header_str, __VA_ARGS__);                  \
+    } while (0)
+
+/**
+ * Type of register, assigned to the HexReg.type field
+ */
+typedef enum { GENERAL_PURPOSE, CONTROL, MODIFIER, DOTNEW } HexRegType;
+
+typedef enum { UNKNOWN_SIGNEDNESS, SIGNED, UNSIGNED } HexSignedness;
+
+/**
+ * Semantic record of the REG tokens, identifying registers
+ */
+typedef struct HexReg {
+    uint8_t id;                 /**< Identifier of the register               */
+    HexRegType type;            /**< Type of the register                     */
+    unsigned bit_width;         /**< Bit width of the reg, 32 or 64 bits      */
+} HexReg;
+
+/**
+ * Data structure, identifying a TCGv temporary value
+ */
+typedef struct HexTmp {
+    unsigned index;             /**< Index of the TCGv temporary value        */
+} HexTmp;
+
+/**
+ * Enum of the possible immediated, an immediate is a value which is known
+ * at tinycode generation time, e.g. an integer value, not a TCGv
+ */
+enum ImmUnionTag {
+    I,
+    VARIABLE,
+    VALUE,
+    QEMU_TMP,
+    IMM_PC,
+    IMM_NPC,
+    IMM_CONSTEXT,
+};
+
+/**
+ * Semantic record of the IMM token, identifying an immediate constant
+ */
+typedef struct HexImm {
+    union {
+        char id;                /**< Identifier, used when type is VARIABLE   */
+        uint64_t value;         /**< Immediate value, used when type is VALUE */
+        uint64_t index;         /**< Index, used when type is QEMU_TMP        */
+    };
+    enum ImmUnionTag type;      /**< Type of the immediate                    */
+} HexImm;
+
+/**
+ * Semantic record of the PRED token, identifying a predicate
+ */
+typedef struct HexPred {
+    char id;                    /**< Identifier of the predicate              */
+} HexPred;
+
+/**
+ * Semantic record of the SAT token, identifying the saturate operator
+ */
+typedef struct HexSat {
+    bool set_overflow;          /**< Should the sat. op. set overflow?        */
+    HexSignedness signedness;   /**< Signedness of the sat. op.               */
+} HexSat;
+
+/**
+ * Semantic record of the CAST token, identifying the cast operator
+ */
+typedef struct HexCast {
+    unsigned bit_width;         /**< Bit width of the cast operator           */
+    HexSignedness signedness;   /**< Unsigned flag for the cast operator      */
+} HexCast;
+
+/**
+ * Semantic record of the EXTRACT token, identifying the cast operator
+ */
+typedef struct HexExtract {
+    unsigned bit_width;         /**< Bit width of the extract operator        */
+    unsigned storage_bit_width; /**< Actual bit width of the extract operator */
+    HexSignedness signedness;   /**< Unsigned flag for the extract operator   */
+} HexExtract;
+
+/**
+ * Semantic record of the MPY token, identifying the fMPY multiplication
+ * operator
+ */
+typedef struct HexMpy {
+    unsigned first_bit_width;        /**< Bit width of 1st operand of fMPY    */
+    unsigned second_bit_width;       /**< Bit width of 2nd operand of fMPY    */
+    HexSignedness first_signedness;  /**< Signedness of 1st operand of fMPY   */
+    HexSignedness second_signedness; /**< Signedness of 2nd operand of fMPY   */
+} HexMpy;
+
+/**
+ * Semantic record of the VARID token, identifying declared variables
+ * of the input language
+ */
+typedef struct HexVar {
+    GString *name;              /**< Name of the VARID variable               */
+} HexVar;
+
+/**
+ * Data structure uniquely identifying a declared VARID variable, used for
+ * keeping track of declared variable, so that any variable is declared only
+ * once, and its properties are propagated through all the subsequent instances
+ * of that variable
+ */
+typedef struct Var {
+    GString *name;              /**< Name of the VARID variable               */
+    uint8_t bit_width;          /**< Bit width of the VARID variable          */
+    HexSignedness signedness;   /**< Unsigned flag for the VARID var          */
+} Var;
+
+/**
+ * Enum of the possible rvalue types, used in the HexValue.type field
+ */
+typedef enum RvalueUnionTag {
+    REGISTER, REGISTER_ARG, TEMP, IMMEDIATE, PREDICATE, VARID
+} RvalueUnionTag;
+
+/**
+ * Semantic record of the rvalue token, identifying any numeric value,
+ * immediate or register based. The rvalue tokens are combined together
+ * through the use of several operators, to encode expressions
+ */
+typedef struct HexValue {
+    union {
+        HexReg reg;             /**< rvalue of register type                  */
+        HexTmp tmp;             /**< rvalue of temporary type                 */
+        HexImm imm;             /**< rvalue of immediate type                 */
+        HexPred pred;           /**< rvalue of predicate type                 */
+        HexVar var;             /**< rvalue of declared variable type         */
+    };
+    RvalueUnionTag type;        /**< Type of the rvalue                       */
+    unsigned bit_width;         /**< Bit width of the rvalue                  */
+    HexSignedness signedness;   /**< Unsigned flag for the rvalue             */
+    bool is_dotnew;             /**< rvalue of predicate type is dotnew?      */
+    bool is_manual;             /**< Opt out of automatic freeing of params   */
+} HexValue;
+
+/**
+ * State of ternary operator
+ */
+typedef enum TernaryState { IN_LEFT, IN_RIGHT } TernaryState;
+
+/**
+ * Data structure used to handle side effects inside ternary operators
+ */
+typedef struct Ternary {
+    TernaryState state;
+    HexValue cond;
+} Ternary;
+
+/**
+ * Operator type, used for referencing the correct operator when calling the
+ * gen_bin_op() function, which in turn will generate the correct code to
+ * execute the operation between the two rvalues
+ */
+typedef enum OpType {
+    ADD_OP, SUB_OP, MUL_OP, ASL_OP, ASR_OP, LSR_OP, ANDB_OP, ORB_OP,
+    XORB_OP, ANDL_OP, MINI_OP, MAXI_OP, MOD_OP
+} OpType;
+
+/**
+ * Data structure including instruction specific information, to be cleared
+ * out after the compilation of each instruction
+ */
+typedef struct Inst {
+    GString *name;              /**< Name of the compiled instruction         */
+    char *code_begin;           /**< Beginning of instruction input code      */
+    char *code_end;             /**< End of instruction input code            */
+    unsigned tmp_count;         /**< Index of the last declared TCGv temp     */
+    unsigned qemu_tmp_count;    /**< Index of the last declared int temp      */
+    unsigned if_count;          /**< Index of the last declared if label      */
+    unsigned error_count;       /**< Number of generated errors               */
+    GArray *allocated;          /**< Allocated declaredVARID vars             */
+    GArray *init_list;          /**< List of initialized registers            */
+    GArray *strings;            /**< Strings allocated by the instruction     */
+} Inst;
+
+/**
+ * Data structure representing the whole translation context, which in a
+ * reentrant flex/bison parser just like ours is passed between the scanner
+ * and the parser, holding all the necessary information to perform the
+ * parsing, this data structure survives between the compilation of different
+ * instructions
+ *
+ */
+typedef struct Context {
+    void *scanner;              /**< Reentrant parser state pointer           */
+    char *input_buffer;         /**< Buffer containing the input code         */
+    GString *out_str;           /**< String containing the output code        */
+    GString *signature_str;     /**< String containing the signatures code    */
+    GString *header_str;        /**< String containing the header code        */
+    FILE *defines_file;         /**< FILE * of the generated header           */
+    FILE *output_file;          /**< FILE * of the C output file              */
+    FILE *enabled_file;         /**< FILE * of the list of enabled inst       */
+    GArray *ternary;            /**< Array to track nesting of ternary ops    */
+    unsigned total_insn;        /**< Number of instructions in input file     */
+    unsigned implemented_insn;  /**< Instruction compiled without errors      */
+    Inst inst;                  /**< Parsing data of the current inst         */
+} Context;
+
+#endif /* IDEF_PARSER_H */
diff --git a/target/hexagon/idef-parser/idef-parser.lex b/target/hexagon/idef-parser/idef-parser.lex
new file mode 100644
index 0000000000..7cdfb22d80
--- /dev/null
+++ b/target/hexagon/idef-parser/idef-parser.lex
@@ -0,0 +1,566 @@ 
+%option noyywrap noinput nounput
+%option 8bit reentrant bison-bridge
+%option warn nodefault
+%option bison-locations
+
+%{
+/*
+ *  Copyright(c) 2019-2021 rev.ng Labs Srl. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+#include <stdbool.h>
+
+#include "hex_regs.h"
+
+#include "idef-parser.h"
+#include "idef-parser.tab.h"
+
+/* Keep track of scanner position for error message printout */
+#define YY_USER_ACTION yylloc->first_column = yylloc->last_column; \
+    for (int i = 0; yytext[i] != '\0'; i++) {   \
+        yylloc->last_column++;                  \
+    }
+
+/* Global Error Counter */
+int error_count;
+
+%}
+
+/* Definitions */
+DIGIT                    [0-9]
+LOWER_ID                 [a-z]
+UPPER_ID                 [A-Z]
+ID                       LOWER_ID|UPPER_ID
+INST_NAME                [A-Z]+[0-9]_([A-Za-z]|[0-9]|_)+
+HEX_DIGIT                [0-9a-fA-F]
+REG_ID_32                e|s|d|t|u|v|x|y
+REG_ID_64                ee|ss|dd|tt|uu|vv|xx|yy
+SYS_ID_32                s|d
+SYS_ID_64                ss|dd
+PRED_ID                  d|s|t|u|v|e|x|x
+IMM_ID                   r|s|S|u|U
+VAR_ID                   [a-zA-Z_][a-zA-Z0-9_]*
+SIGN_ID                  s|u
+STRING_LIT               \"(\\.|[^"\\])*\"
+
+/* Tokens */
+%%
+
+[ \t\f\v]+                { /* Ignore whitespaces. */ }
+[\n\r]+                   { /* Ignore newlines. */ }
+^#.*$                     { /* Ignore linemarkers. */ }
+
+{INST_NAME}               { yylval->string = g_string_new(yytext);
+                            return INAME; }
+"fFLOAT"                 |
+"fUNFLOAT"               |
+"fDOUBLE"                |
+"fUNDOUBLE"              |
+"0.0"                    |
+"0x1.0p52"               |
+"0x1.0p-52"              { return FAIL; }
+"in"                     { return IN; }
+"R"{REG_ID_32}"V" {
+                           yylval->rvalue.type = REGISTER_ARG;
+                           yylval->rvalue.reg.type = GENERAL_PURPOSE;
+                           yylval->rvalue.reg.id = yytext[1];
+                           yylval->rvalue.reg.bit_width = 32;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.is_dotnew = false;
+                           yylval->rvalue.signedness = SIGNED;
+                           return REG; }
+"R"{REG_ID_32}"N" {
+                           yylval->rvalue.type = REGISTER_ARG;
+                           yylval->rvalue.reg.type = DOTNEW;
+                           yylval->rvalue.reg.id = yytext[1];
+                           yylval->rvalue.reg.bit_width = 32;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.is_dotnew = true;
+                           yylval->rvalue.signedness = SIGNED;
+                           return REG; }
+"R"{REG_ID_64}"V" {
+                           yylval->rvalue.type = REGISTER_ARG;
+                           yylval->rvalue.reg.type = GENERAL_PURPOSE;
+                           yylval->rvalue.reg.id = yytext[1];
+                           yylval->rvalue.reg.bit_width = 64;
+                           yylval->rvalue.bit_width = 64;
+                           yylval->rvalue.is_dotnew = false;
+                           yylval->rvalue.signedness = SIGNED;
+                           return REG; }
+"R"{REG_ID_64}"N" {
+                           yylval->rvalue.type = REGISTER_ARG;
+                           yylval->rvalue.reg.type = DOTNEW;
+                           yylval->rvalue.reg.id = yytext[1];
+                           yylval->rvalue.reg.bit_width = 64;
+                           yylval->rvalue.bit_width = 64;
+                           yylval->rvalue.is_dotnew = true;
+                           yylval->rvalue.signedness = SIGNED;
+                           return REG; }
+"MuV" {
+                           yylval->rvalue.type = REGISTER_ARG;
+                           yylval->rvalue.reg.type = MODIFIER;
+                           yylval->rvalue.reg.id = 'u';
+                           yylval->rvalue.reg.bit_width = 32;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.signedness = SIGNED;
+                           return REG; }
+"C"{REG_ID_32}"V" {
+                           yylval->rvalue.type = REGISTER_ARG;
+                           yylval->rvalue.reg.type = CONTROL;
+                           yylval->rvalue.reg.id = yytext[1];
+                           yylval->rvalue.reg.bit_width = 32;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.is_dotnew = false;
+                           yylval->rvalue.signedness = SIGNED;
+                           return REG; }
+"C"{REG_ID_64}"V" {
+                           yylval->rvalue.type = REGISTER_ARG;
+                           yylval->rvalue.reg.type = CONTROL;
+                           yylval->rvalue.reg.id = yytext[1];
+                           yylval->rvalue.reg.bit_width = 64;
+                           yylval->rvalue.bit_width = 64;
+                           yylval->rvalue.is_dotnew = false;
+                           yylval->rvalue.signedness = SIGNED;
+                           return REG; }
+{IMM_ID}"iV" {
+                           yylval->rvalue.type = IMMEDIATE;
+                           yylval->rvalue.signedness = SIGNED;
+                           yylval->rvalue.imm.type = VARIABLE;
+                           yylval->rvalue.imm.id = yytext[0];
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.is_dotnew = false;
+                           return IMM; }
+"P"{PRED_ID}"V" {
+                           yylval->rvalue.type = PREDICATE;
+                           yylval->rvalue.pred.id = yytext[1];
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.is_dotnew = false;
+                           yylval->rvalue.signedness = SIGNED;
+                           return PRED; }
+"P"{PRED_ID}"N" {
+                           yylval->rvalue.type = PREDICATE;
+                           yylval->rvalue.pred.id = yytext[1];
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.is_dotnew = true;
+                           yylval->rvalue.signedness = SIGNED;
+                           return PRED; }
+"IV1DEAD()"              |
+"fPAUSE(uiV);"           { return ';'; }
+"+="                     { return INC; }
+"-="                     { return DEC; }
+"++"                     { return PLUSPLUS; }
+"&="                     { return ANDA; }
+"|="                     { return ORA; }
+"^="                     { return XORA; }
+"<<"                     { return ASL; }
+">>"                     { return ASR; }
+">>>"                    { return LSR; }
+"=="                     { return EQ; }
+"!="                     { return NEQ; }
+"<="                     { return LTE; }
+">="                     { return GTE; }
+"&&"                     { return ANDL; }
+"else"                   { return ELSE; }
+"for"                    { return FOR; }
+"fREAD_IREG"             { return ICIRC; }
+"fPART1"                 { return PART1; }
+"if"                     { return IF; }
+"fFRAME_SCRAMBLE"        { return FSCR; }
+"fFRAME_UNSCRAMBLE"      { return FSCR; }
+"fFRAMECHECK"            { return FCHK; }
+"Constant_extended"      { return CONSTEXT; }
+"fCL1_"{DIGIT}           { return LOCNT; }
+"fbrev"                  { return BREV; }
+"fSXTN"                  { return SXT; }
+"fZXTN"                  { return ZXT; }
+"fDF_MAX"                |
+"fSF_MAX"                |
+"fMAX"                   { return MAX; }
+"fDF_MIN"                |
+"fSF_MIN"                |
+"fMIN"                   { return MIN; }
+"fABS"                   { return ABS; }
+"fRNDN"                  { return ROUND; }
+"fCRND"                  { return CROUND; }
+"fCRNDN"                 { return CROUND; }
+"fPM_CIRI"               { return CIRCADD; }
+"fPM_CIRR"               { return CIRCADD; }
+"fCOUNTONES_"{DIGIT}     { return COUNTONES; }
+"fSATN"                  { yylval->sat.set_overflow = true;
+                           yylval->sat.signedness = SIGNED;
+                           return SAT; }
+"fVSATN"                 { yylval->sat.set_overflow = false;
+                           yylval->sat.signedness = SIGNED;
+                           return SAT; }
+"fSATUN"                 { yylval->sat.set_overflow = true;
+                           yylval->sat.signedness = UNSIGNED;
+                           return SAT; }
+"fCONSTLL"               { yylval->cast.bit_width = 64;
+                           yylval->cast.signedness = SIGNED;
+                           return CAST; }
+"fCONSTULL"              { yylval->cast.bit_width = 64;
+                           yylval->cast.signedness = UNSIGNED;
+                           return CAST; }
+"fSE32_64"               { yylval->cast.bit_width = 64;
+                           yylval->cast.signedness = SIGNED;
+                           return CAST; }
+"fCAST4_4u"              { yylval->cast.bit_width = 32;
+                           yylval->cast.signedness = UNSIGNED;
+                           return CAST; }
+"fCAST4_8s"              { yylval->cast.bit_width = 64;
+                           yylval->cast.signedness = SIGNED;
+                           return CAST; }
+"fCAST4_8u"              { return CAST4_8U; }
+"fCAST4u"                { yylval->cast.bit_width = 32;
+                           yylval->cast.signedness = UNSIGNED;
+                           return CAST; }
+"fNEWREG"                |
+"fCAST4_4s"              |
+"fCAST4s"                { yylval->cast.bit_width = 32;
+                           yylval->cast.signedness = SIGNED;
+                           return CAST; }
+"fCAST8_8u"              { yylval->cast.bit_width = 64;
+                           yylval->cast.signedness = UNSIGNED;
+                           return CAST; }
+"fCAST8u"                { yylval->cast.bit_width = 64;
+                           yylval->cast.signedness = UNSIGNED;
+                           return CAST; }
+"fCAST8_8s"              |
+"fCAST8s"                { yylval->cast.bit_width = 64;
+                           yylval->cast.signedness = SIGNED;
+                           return CAST; }
+"fGETBIT"                { yylval->extract.bit_width = 1;
+                           yylval->extract.storage_bit_width = 1;
+                           yylval->extract.signedness = UNSIGNED;
+                           return EXTRACT; }
+"fGETBYTE"               { yylval->extract.bit_width = 8;
+                           yylval->extract.storage_bit_width = 8;
+                           yylval->extract.signedness = SIGNED;
+                           return EXTRACT; }
+"fGETUBYTE"              { yylval->extract.bit_width = 8;
+                           yylval->extract.storage_bit_width = 8;
+                           yylval->extract.signedness = UNSIGNED;
+                           return EXTRACT; }
+"fGETHALF"               { yylval->extract.bit_width = 16;
+                           yylval->extract.storage_bit_width = 16;
+                           yylval->extract.signedness = SIGNED;
+                           return EXTRACT; }
+"fGETUHALF"              { yylval->extract.bit_width = 16;
+                           yylval->extract.storage_bit_width = 16;
+                           yylval->extract.signedness = UNSIGNED;
+                           return EXTRACT; }
+"fGETWORD"               { yylval->extract.bit_width = 32;
+                           yylval->extract.storage_bit_width = 64;
+                           yylval->extract.signedness = SIGNED;
+                           return EXTRACT; }
+"fGETUWORD"              { yylval->extract.bit_width = 32;
+                           yylval->extract.storage_bit_width = 64;
+                           yylval->extract.signedness = UNSIGNED;
+                           return EXTRACT; }
+"fEXTRACTU_BITS"         { return EXTBITS; }
+"fEXTRACTU_RANGE"        { return EXTRANGE; }
+"fSETBIT"                { yylval->cast.bit_width = 1;
+                           yylval->cast.signedness = SIGNED;
+                           return DEPOSIT; }
+"fSETBYTE"               { yylval->cast.bit_width = 8;
+                           yylval->cast.signedness = SIGNED;
+                           return DEPOSIT; }
+"fSETHALF"               { yylval->cast.bit_width = 16;
+                           yylval->cast.signedness = SIGNED;
+                           return SETHALF; }
+"fSETWORD"               { yylval->cast.bit_width = 32;
+                           yylval->cast.signedness = SIGNED;
+                           return DEPOSIT; }
+"fINSERT_BITS"           { return INSBITS; }
+"fSETBITS"               { return SETBITS; }
+"fMPY8UU"                { yylval->mpy.first_bit_width = 8;
+                           yylval->mpy.second_bit_width = 8;
+                           yylval->mpy.first_signedness = UNSIGNED;
+                           yylval->mpy.second_signedness = UNSIGNED;
+                           return MPY; }
+"fMPY8US"                { yylval->mpy.first_bit_width = 8;
+                           yylval->mpy.second_bit_width = 8;
+                           yylval->mpy.first_signedness = UNSIGNED;
+                           yylval->mpy.second_signedness = SIGNED;
+                           return MPY; }
+"fMPY8SU"                { yylval->mpy.first_bit_width = 8;
+                           yylval->mpy.second_bit_width = 8;
+                           yylval->mpy.first_signedness = SIGNED;
+                           yylval->mpy.second_signedness = UNSIGNED;
+                           return MPY; }
+"fMPY8SS"                { yylval->mpy.first_bit_width = 8;
+                           yylval->mpy.second_bit_width = 8;
+                           yylval->mpy.first_signedness = SIGNED;
+                           yylval->mpy.second_signedness = SIGNED;
+                           return MPY; }
+"fMPY16UU"               { yylval->mpy.first_bit_width = 16;
+                           yylval->mpy.second_bit_width = 16;
+                           yylval->mpy.first_signedness = UNSIGNED;
+                           yylval->mpy.second_signedness = UNSIGNED;
+                           return MPY; }
+"fMPY16US"               { yylval->mpy.first_bit_width = 16;
+                           yylval->mpy.second_bit_width = 16;
+                           yylval->mpy.first_signedness = UNSIGNED;
+                           yylval->mpy.second_signedness = SIGNED;
+                           return MPY; }
+"fMPY16SU"               { yylval->mpy.first_bit_width = 16;
+                           yylval->mpy.second_bit_width = 16;
+                           yylval->mpy.first_signedness = SIGNED;
+                           yylval->mpy.second_signedness = UNSIGNED;
+                           return MPY; }
+"fMPY16SS"               { yylval->mpy.first_bit_width = 16;
+                           yylval->mpy.second_bit_width = 16;
+                           yylval->mpy.first_signedness = SIGNED;
+                           yylval->mpy.second_signedness = SIGNED;
+                           return MPY; }
+"fMPY32UU"               { yylval->mpy.first_bit_width = 32;
+                           yylval->mpy.second_bit_width = 32;
+                           yylval->mpy.first_signedness = UNSIGNED;
+                           yylval->mpy.second_signedness = UNSIGNED;
+                           return MPY; }
+"fMPY32US"               { yylval->mpy.first_bit_width = 32;
+                           yylval->mpy.second_bit_width = 32;
+                           yylval->mpy.first_signedness = UNSIGNED;
+                           yylval->mpy.second_signedness = SIGNED;
+                           return MPY; }
+"fMPY32SU"               { yylval->mpy.first_bit_width = 32;
+                           yylval->mpy.second_bit_width = 32;
+                           yylval->mpy.first_signedness = SIGNED;
+                           yylval->mpy.second_signedness = UNSIGNED;
+                           return MPY; }
+"fSFMPY"                 |
+"fMPY32SS"               { yylval->mpy.first_bit_width = 32;
+                           yylval->mpy.second_bit_width = 32;
+                           yylval->mpy.first_signedness = SIGNED;
+                           yylval->mpy.second_signedness = SIGNED;
+                           return MPY; }
+"fMPY3216SS"             { yylval->mpy.first_bit_width = 32;
+                           yylval->mpy.second_bit_width = 16;
+                           yylval->mpy.first_signedness = SIGNED;
+                           yylval->mpy.second_signedness = SIGNED;
+                           return MPY; }
+"fMPY3216SU"             { yylval->mpy.first_bit_width = 32;
+                           yylval->mpy.second_bit_width = 16;
+                           yylval->mpy.first_signedness = SIGNED;
+                           yylval->mpy.second_signedness = UNSIGNED;
+                           return MPY; }
+"fNEWREG_ST"             |
+"fIMMEXT"                |
+"fMUST_IMMEXT"           |
+"fPASS"                  |
+"fECHO"                  { return IDENTITY; }
+"(size8"[us]"_t)"        { yylval->cast.bit_width = 64;
+                           if (yytext[6] == 'u') {
+                               yylval->cast.signedness = UNSIGNED;
+                           } else {
+                               yylval->cast.signedness = SIGNED;
+                           }
+                           return CAST; }
+"(int)"                  { yylval->cast.bit_width = 32;
+                           yylval->cast.signedness = SIGNED;
+                           return CAST; }
+"(unsigned int)"         { yylval->cast.bit_width = 32;
+                           yylval->cast.signedness = UNSIGNED;
+                           return CAST; }
+"fREAD_PC()"             |
+"PC"                     { return PC; }
+"fREAD_NPC()"            |
+"NPC"                    { return NPC; }
+"fGET_LPCFG"             |
+"USR.LPCFG"              { return LPCFG; }
+"LOAD_CANCEL(EA)"        |
+"STORE_CANCEL(EA)"       |
+"CANCEL"                 { return CANCEL; }
+"N"{LOWER_ID}            { yylval->rvalue.type = REGISTER_ARG;
+                           yylval->rvalue.reg.type = GENERAL_PURPOSE;
+                           yylval->rvalue.reg.id = yytext[1];
+                           yylval->rvalue.reg.bit_width = 32;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.signedness = UNSIGNED;
+                           return REG; }
+"N"{LOWER_ID}"N"         { yylval->rvalue.type = REGISTER_ARG;
+                           yylval->rvalue.reg.type = DOTNEW;
+                           yylval->rvalue.reg.id = yytext[1];
+                           yylval->rvalue.reg.bit_width = 32;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.signedness = UNSIGNED;
+                           return REG; }
+"fREAD_SP()"             |
+"SP"                     { yylval->rvalue.type = REGISTER;
+                           yylval->rvalue.reg.type = GENERAL_PURPOSE;
+                           yylval->rvalue.reg.id = HEX_REG_SP;
+                           yylval->rvalue.reg.bit_width = 32;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.signedness = UNSIGNED;
+                           return REG; }
+"fREAD_FP()"             |
+"FP"                     { yylval->rvalue.type = REGISTER;
+                           yylval->rvalue.reg.type = GENERAL_PURPOSE;
+                           yylval->rvalue.reg.id = HEX_REG_FP;
+                           yylval->rvalue.reg.bit_width = 32;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.signedness = UNSIGNED;
+                           return REG; }
+"fREAD_LR()"             |
+"LR"                     { yylval->rvalue.type = REGISTER;
+                           yylval->rvalue.reg.type = GENERAL_PURPOSE;
+                           yylval->rvalue.reg.id = HEX_REG_LR;
+                           yylval->rvalue.reg.bit_width = 32;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.signedness = UNSIGNED;
+                           return REG; }
+"fREAD_GP()"             |
+"GP"                     { yylval->rvalue.type = REGISTER;
+                           yylval->rvalue.reg.type = CONTROL;
+                           yylval->rvalue.reg.id = HEX_REG_GP;
+                           yylval->rvalue.reg.bit_width = 32;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.signedness = UNSIGNED;
+                           return REG; }
+"fREAD_LC"[01]           { yylval->rvalue.type = REGISTER;
+                           yylval->rvalue.reg.type = CONTROL;
+                           yylval->rvalue.reg.id = HEX_REG_LC0
+                                                 + (yytext[8] - '0') * 2;
+                           yylval->rvalue.reg.bit_width = 32;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.signedness = UNSIGNED;
+                           return REG; }
+"LC"[01]                 { yylval->rvalue.type = REGISTER;
+                           yylval->rvalue.reg.type = CONTROL;
+                           yylval->rvalue.reg.id = HEX_REG_LC0
+                                                 + (yytext[2] - '0') * 2;
+                           yylval->rvalue.reg.bit_width = 32;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.signedness = UNSIGNED;
+                           return REG; }
+"fREAD_SA"[01]           { yylval->rvalue.type = REGISTER;
+                           yylval->rvalue.reg.type = CONTROL;
+                           yylval->rvalue.reg.id = HEX_REG_SA0
+                                                 + (yytext[8] - '0') * 2;
+                           yylval->rvalue.reg.bit_width = 32;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.signedness = UNSIGNED;
+                           return REG; }
+"SA"[01]                 { yylval->rvalue.type = REGISTER;
+                           yylval->rvalue.reg.type = CONTROL;
+                           yylval->rvalue.reg.id = HEX_REG_SA0
+                                                 + (yytext[2] - '0') * 2;
+                           yylval->rvalue.reg.bit_width = 32;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.signedness = UNSIGNED;
+                           return REG; }
+"MuN"                    { return MUN; }
+"fREAD_P0()"             { yylval->rvalue.type = PREDICATE;
+                           yylval->rvalue.pred.id = '0';
+                           yylval->rvalue.bit_width = 32;
+                           return PRED; }
+[pP]{DIGIT}              { yylval->rvalue.type = PREDICATE;
+                           yylval->rvalue.pred.id = yytext[1];
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.is_dotnew = false;
+                           return PRED; }
+[pP]{DIGIT}[nN]          { yylval->rvalue.type = PREDICATE;
+                           yylval->rvalue.pred.id = yytext[1];
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.is_dotnew = true;
+                           return PRED; }
+"fLSBNEW"                { return LSBNEW; }
+"N"                      { yylval->rvalue.type = IMMEDIATE;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.imm.type = VARIABLE;
+                           yylval->rvalue.imm.id = 'N';
+                           return IMM; }
+"i"                      { yylval->rvalue.type = IMMEDIATE;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.signedness = SIGNED;
+                           yylval->rvalue.imm.type = I;
+                           return IMM; }
+{SIGN_ID}                { if (yytext[0] == 'u') {
+                               yylval->signedness = UNSIGNED;
+                           } else {
+                               yylval->signedness = SIGNED;
+                           }
+                           return SIGN;
+                         }
+"fSF_BIAS()"             { yylval->rvalue.type = IMMEDIATE;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.signedness = SIGNED;
+                           yylval->rvalue.imm.type = VALUE;
+                           yylval->rvalue.imm.value = 127;
+                           return IMM; }
+"0x"{HEX_DIGIT}+         |
+{DIGIT}+                 { yylval->rvalue.type = IMMEDIATE;
+                           yylval->rvalue.bit_width = 32;
+                           yylval->rvalue.signedness = SIGNED;
+                           yylval->rvalue.imm.type = VALUE;
+                           yylval->rvalue.imm.value = strtoull(yytext, NULL, 0);
+                           return IMM; }
+"0x"{HEX_DIGIT}+"LL"     |
+{DIGIT}+"LL"             { yylval->rvalue.type = IMMEDIATE;
+                           yylval->rvalue.bit_width = 64;
+                           yylval->rvalue.signedness = SIGNED;
+                           yylval->rvalue.imm.type = VALUE;
+                           yylval->rvalue.imm.value = strtoull(yytext, NULL, 0);
+                           return IMM; }
+"0x"{HEX_DIGIT}+"ULL"    |
+{DIGIT}+"ULL"            { yylval->rvalue.type = IMMEDIATE;
+                           yylval->rvalue.bit_width = 64;
+                           yylval->rvalue.signedness = UNSIGNED;
+                           yylval->rvalue.imm.type = VALUE;
+                           yylval->rvalue.imm.value = strtoull(yytext, NULL, 0);
+                           return IMM; }
+"fLOAD"                  { return LOAD; }
+"fSTORE"                 { return STORE; }
+"fROTL"                  { return ROTL; }
+"fSET_OVERFLOW"          { return SETOVF; }
+"fCARRY_FROM_ADD"        { return CARRY_FROM_ADD; }
+"fADDSAT64"              { return ADDSAT64; }
+"size"[1248][us]"_t"     { /* Handles "size_t" variants of int types */
+                           const unsigned int bits_per_byte = 8;
+                           const unsigned int bytes = yytext[4] - '0';
+                           yylval->rvalue.bit_width = bits_per_byte * bytes;
+                           if (yytext[5] == 'u') {
+                               yylval->rvalue.signedness = UNSIGNED;
+                           } else {
+                               yylval->rvalue.signedness = SIGNED;
+                           }
+                           return TYPE_SIZE_T; }
+"size16"[us]"_t"         { /* Handles "size_t" variants of int types */
+                           yylval->rvalue.bit_width = 128;
+                           if (yytext[6] == 'u') {
+                               yylval->rvalue.signedness = UNSIGNED;
+                           } else {
+                               yylval->rvalue.signedness = SIGNED;
+                           }
+                           return TYPE_SIZE_T; }
+"signed"                 { return TYPE_SIGNED; }
+"unsigned"               { return TYPE_UNSIGNED; }
+"long"                   { return TYPE_LONG; }
+"int"                    { return TYPE_INT; }
+"const"                  { /* Emit no token */ }
+{VAR_ID}                 { /* Variable name, we adopt the C names convention */
+                           yylval->rvalue.type = VARID;
+                           yylval->rvalue.var.name = g_string_new(yytext);
+                           /* Default to an unknown signedness and 0 width. */
+                           yylval->rvalue.bit_width = 0;
+                           yylval->rvalue.signedness = UNKNOWN_SIGNEDNESS;
+                           return VAR; }
+"fatal("{STRING_LIT}")"  { /* Emit no token */ }
+"fHINTJR(RsV)"           { /* Emit no token */ }
+.                        { return yytext[0]; }
+
+%%
diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build
index 5945098cc4..63f13e1d21 100644
--- a/target/hexagon/meson.build
+++ b/target/hexagon/meson.build
@@ -196,4 +196,8 @@  preprocessed_idef_parser_input_generated = custom_target(
     command: [idef_parser_dir / 'prepare', '@INPUT@', '-I' + idef_parser_dir, '-o', '@OUTPUT@'],
 )
 
+flex = generator(find_program('flex'),
+                 output: ['@BASENAME@.yy.c', '@BASENAME@.yy.h'],
+                 arguments: ['-o', '@OUTPUT0@', '--header-file=@OUTPUT1@', '@INPUT@'])
+
 target_arch += {'hexagon': hexagon_ss}