0

I'm trying to use syntax parameters in order to inject new syntax where I need it to be injected. The result of this is then used in other syntax. However, it's not working as I expect it to. Here's a minimal working example:

#lang racket

(require (for-syntax racket/contract))
(require racket/stxparam)


;; A list for holding the instructions
(define instructions-db
  '())

;===================================
; MACRO FOR DEFINING AN INSTRUCTION
;===================================

(provide define-instruction)
(define-syntax (define-instruction stx)
  (syntax-case stx ()
    [(_ id (attrs ...))
     ;; Insert instruction into database
     #'(set! instructions-db (append instructions-db '(id (attrs ...))))]))


;=============================================================
; MACRO TO MIMIC 'FOR' BUT TO BE USED WITH DEFINE-INSTRUCTION
;=============================================================

(begin-for-syntax
  ; Gets the head of each list within the given list of syntax lists. If any of
  ; the lists are empty, an empty list is returned.
  (define/contract (stx-heads ls)
    ((listof (syntax/c list?)) . -> . (listof (syntax/c any/c)))
    (let loop ([ls ls]
               [hs '()])
      (if (null? ls)
          hs
          (let ([l (syntax-e (car ls))])
            (if (null? l)
                '()
                (loop (cdr ls) (append hs (list (car l)))))))))

  ; Gets the tail of each list within the given list of syntax lists. If any of
  ; the lists are empty, an empty list is returned.
  (define/contract (stx-tails ls)
    ((listof (syntax/c list?)) . -> . (listof (syntax/c list?)))
    (let loop ([ls ls]
               [ts '()])
      (if (null? ls)
          ts
          (let* ([stx-l (car ls)]
                 [l (syntax-e stx-l)])
            (if (null? l)
                '()
                (loop (cdr ls) (append ts (list
                                           (datum->syntax stx-l
                                                          (cdr l)
                                                          stx-l
                                                          stx-l)))))))))

  (define (define-instruction-stx? stx)
    (if (syntax? stx)
        (let ([e (syntax-e stx)])
          (and (pair? e)
               (syntax? (car e))
               (equal? (syntax-e (car e)) 'define-instruction)))
        #f))

  ;; Given a syntax object, an identifier, and a replacement value, construct a
  ;; new syntax object where any occurrence of the identifier is substituted for
  ;; the value.
  (define (stx-id-substitute id replacement stx)
    (let loop ([e stx])
      (cond [(and (identifier? e)
                  (bound-identifier=? e id))
             replacement]
            [(syntax? e)
             (datum->syntax e (loop (syntax-e e)) e e)]
            [(pair? e)
             (cons (loop (car e)) (loop (cdr e)))]
            [else e])))

  ;; Given a 'define-instruction' syntax object, extends its ID with the given
  ;; string. If any other object is given, it is left intact and returned.
  (define (extend-id-of-define-instruction-stx suffix stx)
    (if (define-instruction-stx? stx)
        (let* ([e (syntax-e stx)]
               [stx-construct (car e)]
               [stx-id (cadr e)]
               [new-stx-id
                (datum->syntax stx-id
                               (string->symbol
                                (format "~a~a"
                                        (symbol->string (syntax-e stx-id))
                                        suffix))
                               stx-id
                               stx-id)]
               [stx-attrs (caddr e)])
          (datum->syntax stx
                         `(,stx-construct ,new-stx-id ,stx-attrs)
                         stx
                         stx))
        stx))

  ;; Given a list of variable-value pairs and define-instruction body, construct
  ;; a new body where all varible occurrences have been replaced with its
  ;; values.
  (define (instr-for-body-args-sub var-val-pairs stx-body)
    (let loop ([var-val-pairs var-val-pairs]
               [stx-body stx-body])
      (if (null? var-val-pairs)
          stx-body
          (let* ([var-val-p (car var-val-pairs)]
                 [var (car var-val-p)]
                 [val (cdr var-val-p)]
                 [new-stx-body (stx-id-substitute var val stx-body)]
                 [rest-var-val-pairs (cdr var-val-pairs)])
            (loop rest-var-val-pairs new-stx-body)))))

  ;; Given a list of variable-value pairs and define-instruction body, construct
  ;; a new body where all varible occurrences have been replaced with its
  ;; values. Also, an index is appended to the identifier of the new
  ;; define-instruction body.
  (define (instr-for-body-args var-val-pairs instr-index stx-body0)
    (let* ([stx-body1 (instr-for-body-args-sub var-val-pairs stx-body0)]
           [stx-body2 (let loop ([e stx-body1])
                        (cond [(define-instruction-stx? e)
                               (extend-id-of-define-instruction-stx
                                (format ":~a" instr-index)
                                e)]
                              [(syntax? e)
                               (datum->syntax e (loop (syntax-e e)) e e)]
                              [(pair? e)
                               (cons (loop (car e)) (loop (cdr e)))]
                              [else e]))])
      stx-body2))

  ;; Given a list of iteration arguments and an define-instruction body,
  ;; construct a list of define-instruction bodies.
  (define (instr-for-body stx-args stx-body)
    (let ([stx-vars (stx-heads (syntax-e stx-args))])
      (let loop ([stx-val-lists (stx-heads (stx-tails (syntax-e stx-args)))]
                 [instr-index 0])
        (if (null? stx-val-lists)
            '() ;; No more values to iterate over
            (let ([stx-vals (stx-heads stx-val-lists)])
              (if (null? stx-vals)
                  '() ;; At least one arg list has no more values
                  (let ([stx-arg-val-pairs (map cons stx-vars stx-vals)])
                    (cons (instr-for-body-args stx-arg-val-pairs
                                               instr-index
                                               stx-body)
                          (loop (stx-tails stx-val-lists)
                                (+ instr-index 1)))))))))))

(provide instr-for)
(define-syntax (instr-for stx)
  (syntax-case stx ()
    [(_ args body ...)
     (with-syntax ([(replaced-body ...)
                    (foldl
                     (lambda (stx-body replaced-stx-bodies)
                       (append (instr-for-body #'args stx-body)
                               replaced-stx-bodies))
                     '()
                     (syntax-e #'(body ...)))])
                  #'(begin replaced-body ...))]))


;===============================================
; MACROS TO SIMPLIFY DEFINITION OF INSTRUCTIONS
;===============================================

(define-syntax-parameter mem-op-addr
  (lambda (stx)
    (raise-syntax-error
     (syntax-e stx)
     "can only be used inside define-modrm-mem-op-instruction")))

(provide define-complex-addr-mode-instructions)
(define-syntax (define-complex-addr-mode-instructions stx)
  (syntax-case stx ()
    [(_ id (attrs ...))
     #'(begin
         (instr-for ([addr (#'reg1
                            #'[inttoptr 32 offset 32]
                            #'[inttoptr 32 (add 32 rbase rindex) 32]
                            #'[inttoptr 32 (add 32
                            #'                  rbase
                            #'                  (add 32 rindex offset))
                            #'          32])])
           (let ([_addr (syntax->datum addr)])
             (syntax-parameterize ([mem-op-addr
                                    (make-rename-transformer #'_addr)])
               (define-instruction id (attrs ...))))))]))

This code is used where instructions are defined and put into a database. The semantics of the instructions of that database are then later used to generate code.

Say now that I want to declare an instruction. This is done as follows:

(define-instruction ADD:0
  ((semantics (add 8 reg0 reg1))))

(displayln instructions-db)

which produces:

(ADD:0 ((semantics (add 8 reg0 reg1))

To handle different bit widths, we can either do:

(define-instruction ADD:0
  ((semantics (add 8 reg0 reg1))))
(define-instruction ADD:1
  ((semantics (add 16 reg0 reg1))))
(define-instruction ADD:2
  ((semantics (add 32 reg0 reg1))))

(displayln instructions-db)

or simply use my instr-for macro:

(instr-for ([i (8 16 32)])
  (define-instruction ADD
    ((semantics (add i reg0 reg1)))))

(displayln instructions-db)

which gives the same result as above:

([ADD:0 ((semantics (add 8 reg0 reg1)))]
 [ADD:1 ((semantics (add 16 reg0 reg1)))]
 [ADD:2 ((semantics (add 32 reg0 reg1)))])

Now, some instructions have complex addressing modes which appear across multiple instructions. For example:

; some ADD instructions
(define-instruction ADD:0
  ((semantics
    (add 32 reg0 (load-mem 32 reg1)))))
(define-instruction ADD:1
  ((semantics
    (add 32 reg0 (load-mem 32 [inttoptr 32 offset 32])))))
(define-instruction ADD:2
  ((semantics
    (add 32 reg0 (load-mem 32 [inttoptr 32 (add 32 rbase rindex) 32])))))
(define-instruction ADD:3
  ((semantics
    (add 32 reg0 (load-mem 32 [inttoptr 32 (add 32
                                                rbase
                                                (add 32 rindex offset))
                                        32])))))

; some SUB instructions, with the same addressing modes
(define-instruction SUB:0
  ((semantics
    (sub 32 reg0 (load-mem 32 reg1)))))
(define-instruction SUB:1
  ((semantics
    (sub 32 reg0 (load-mem 32 [inttoptr 32 offset 32])))))
(define-instruction SUB:2
  ((semantics
    (sub 32 reg0 (load-mem 32 [inttoptr 32 (add 32 rbase rindex) 32])))))
(define-instruction SUB:3
  ((semantics
    (sub 32 reg0 (load-mem 32 [inttoptr 32 (add 32
                                                rbase
                                                (add 32 rindex offset))
                                        32])))))

To avoid copy-pasting, I have defined a new macro define-complex-addr-mode-instructions to allows us to declare the same instructions as above simply with:

(define-complex-addr-mode-instructions ADD
  ((semantics (add 32 reg0 (load-mem 32 mem-op-addr)))))
(define-complex-addr-mode-instructions SUB
  ((semantics (add 32 reg0 (load-mem 32 mem-op-addr)))))

(displayln instructions-db)

However, this produces:

([ADD:0 ((semantics (add 32 reg0 (load-mem 32 mem-op-addr))))]
 [ADD:1 ((semantics (add 32 reg0 (load-mem 32 mem-op-addr))))]
 [ADD:2 ((semantics (add 32 reg0 (load-mem 32 mem-op-addr))))]
 [ADD:3 ((semantics (add 32 reg0 (load-mem 32 mem-op-addr))))]
 [SUB:0 ((semantics (add 32 reg0 (load-mem 32 mem-op-addr))))]
 [SUB:1 ((semantics (add 32 reg0 (load-mem 32 mem-op-addr))))]
 [SUB:2 ((semantics (add 32 reg0 (load-mem 32 mem-op-addr))))]
 [SUB:3 ((semantics (add 32 reg0 (load-mem 32 mem-op-addr))))])

Reading Greg Hendershott's guide on macros, I tried to implement define-complex-addr-mode-instructions using syntax parameters as it's apparently bad to try to do this using syntax->datum. Have I misunderstood how syntax parameters work, or is this a case where I need to use datum->syntax? I noticed that it works if I replace the bound-identifier=? in instr-for to free-identifier=?, but I suspect that's not the proper way to do it.

gablin
  • 4,678
  • 6
  • 33
  • 47
  • 1
    It looks to me like the answer to your question is, in fact, the same as the answer to [your previous question](https://stackoverflow.com/q/50024716/465378), despite the increase in complexity: `define-instruction` still puts its inputs under `quote` in its expansion, and no expansion happens under `quote`. I’d reconsider whether or not `quote` is really the right choice there. It seems like you should really be doing some more processing on those inputs, and you could likely do it in a way that signals to the macroexpander (directly or indirectly) that expansion may recur into those spots. – Alexis King Apr 26 '18 at 18:45
  • Yeah, I realized that too on the way home from work yesterday after having posted this question. Truth be told, I didn't write the code for `define-instruction` so I didn't fully understand how it processes its arguments, but it seems that it wraps everything inside `quote` and then parses it element by element. Since the arguments are not executed and hence not parsed as syntax by Racket, it's now obvious why `syntax-parameterize` doesn't work in this case. The easiest solution for me is thus to do something along the lines of `instr-for`. Thanks for your help! – gablin Apr 27 '18 at 07:14
  • @AlexisKing Could you please turn your comment into an answer so I can mark this question as answered? – gablin Apr 27 '18 at 08:12
  • I disagree that the solution you want is something like `instr-for` that walks its subforms and clobbers them. That’s almost always the wrong thing to do. Let the macroexpander work for you instead of fighting it. Wherever possible, allow expanding into expressions that permit further expansion. Consider using `local-expand` if you really need to force recursive expansion for some reason or another. Yield to the macroexpander and let it do its work. – Alexis King Apr 27 '18 at 15:05
  • Does that mean the macro expander _can_ walk through quoted statements? If so, how? Evidently the approach above is not the way to do it. – gablin Apr 30 '18 at 08:37
  • No, things under `quote` are not expanded [by definition](https://stackoverflow.com/a/34984553/465378). But I would posit that it’s highly likely `quote` is not actually what you want. Why are you using `quote` in the first place? Why not expand to equivalent expressions instead that *can* be traversed by the expander? – Alexis King May 01 '18 at 21:58
  • The problem is that I don't want the data inside the `semantics` field to be evaluated; it simply contains a description (that is, a list of symbols) of what the instruction does. At a later stage, another function takes the semantics as output and generates code corresponding to the description. Is there some other way of storing a list of symbols without using `quote`? – gablin May 02 '18 at 10:50

0 Answers0